From 84e20ea4953ea25ba8d228d405fb9e7ac0ccf20b Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Sun, 16 Feb 2025 15:37:36 +0100 Subject: [PATCH 1/7] gh-101178: C implementation of base64._a85encode Initially done to reduce the huge memory consumption of the previous implementation for large inputs, and that no memory-friendly python way was found that did not include a performance regression This implementation also greatly improve performance in all cases Signed-off-by: Romuald Brunet --- Lib/base64.py | 35 +++--------- Modules/binascii.c | 88 ++++++++++++++++++++++++++++ Modules/clinic/binascii.c.h | 111 +++++++++++++++++++++++++++++++++++- 3 files changed, 207 insertions(+), 27 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..1de57ef9cd97a3 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -298,27 +298,12 @@ def b16decode(s, casefold=False): def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): # Helper function for a85encode and b85encode + # chars2 is now unused if not isinstance(b, bytes_types): b = memoryview(b).tobytes() - padding = (-len(b)) % 4 - if padding: - b = b + b'\0' * padding - words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) - - chunks = [b'z' if foldnuls and not word else - b'y' if foldspaces and word == 0x20202020 else - (chars2[word // 614125] + - chars2[word // 85 % 7225] + - chars[word % 85]) - for word in words] - - if padding and not pad: - if chunks[-1] == b'z': - chunks[-1] = chars[0] * 5 - chunks[-1] = chunks[-1][:-padding] - - return b''.join(chunks) + return binascii.b2a_base85(b, chars=chars, pad=pad, + foldnuls=foldnuls, foldspaces=foldspaces) def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. @@ -337,14 +322,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): adobe controls whether the encoded byte sequence is framed with <~ and ~>, which is used by the Adobe implementation. """ - global _a85chars, _a85chars2 + global _a85chars # Delay the initialization of tables to not waste memory # if the function is never called - if _a85chars2 is None: + if _a85chars is None: _a85chars = [bytes((i,)) for i in range(33, 118)] - _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] - result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) + result = _85encode(b, b''.join(_a85chars), None, pad, True, foldspaces) if adobe: result = _A85START + result @@ -445,13 +429,12 @@ def b85encode(b, pad=False): If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - global _b85chars, _b85chars2 + global _b85chars # Delay the initialization of tables to not waste memory # if the function is never called - if _b85chars2 is None: + if _b85chars is None: _b85chars = [bytes((i,)) for i in _b85alphabet] - _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] - return _85encode(b, _b85chars, _b85chars2, pad) + return _85encode(b, _b85alphabet, None, pad) def b85decode(b): """Decode the base85-encoded bytes-like object or ASCII string b diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..8d80abdb9268a1 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1239,6 +1239,93 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs, return rv; } +/*[clinic input] +binascii.b2a_base85 + + data: Py_buffer + chars: Py_buffer + pad: bool = False + foldnuls: bool = False + foldspaces: bool = False + +Utility method used by the base64 module to encode a85/b85 data + + data: bytes + chars: 85 bytes conversion table + pad: use NULL-paded input if necessary + foldnuls: replace NULL chunks by 'z' + foldspaces: replace space-only chucks by 'y' + +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, + int pad, int foldnuls, int foldspaces) +/*[clinic end generated code: output=0a92b3c535580aa0 input=a2d8ae712ed5adba]*/ +{ + if (chars->len != 85) { + PyErr_SetString(PyExc_ValueError, + "chars must be exactly 85 bytes long"); + return NULL; + } + + _PyBytesWriter writer; + _PyBytesWriter_Init(&writer); + + const size_t bin_len = data->len; + + // Allocate up to maxium encoded length, adjusted at end + const size_t ascii_len = ((bin_len + 3) / 4) * 5; + + unsigned char *ascii_data = _PyBytesWriter_Alloc(&writer, ascii_len); + if (ascii_data == NULL) { + PyErr_NoMemory(); + return NULL; + } + + const unsigned char *table = chars->buf; + const unsigned char *bin_data = data->buf; + + size_t i, j; + for (i = 0; i < bin_len; i += 4) { + const size_t chunk_size = (bin_len - i >= 4) ? 4 : (bin_len - i); + + // translate chunk to 32bit integer + uint32_t value = 0; + for (j = 0; j < chunk_size; j++) { + value = (value << 8) | bin_data[i + j]; + } + value <<= (4 - chunk_size) * 8; + + if (foldnuls && value == 0) { + *ascii_data++ = 'z'; + } else if (foldspaces && value == 0x20202020) { + *ascii_data++ = 'y'; + } else { + for (j = 0; j < 5 ; j++) { + ascii_data[4 - j] = table[value % 85]; + value /= 85; + } + ascii_data += 5; + } + } + + // In case `i` went over the input size, we may need to shorten the output + const size_t overflow = (i - bin_len); + + if (overflow && !pad && foldnuls && ascii_data[-1] == 'z') { + ascii_data--; + memset(ascii_data, table[0], 5); + ascii_data += 5; + } + + if (!pad) { + ascii_data -= overflow; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} + /* List of functions defined in the module */ static struct PyMethodDef binascii_module_methods[] = { @@ -1246,6 +1333,7 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF + BINASCII_B2A_BASE85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index f81f12c388f373..791db5864a8a4c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -774,4 +774,113 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=9ed7fbeec13c6606 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(binascii_b2a_base85__doc__, +"b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n" +" foldspaces=False)\n" +"--\n" +"\n" +"Utility method used by the base64 module to encode a85/b85 data\n" +"\n" +" data: bytes\n" +" chars: 85 bytes conversion table\n" +" pad: use NULL-paded input if necessary\n" +" foldnuls: replace NULL chunks by \'z\'\n" +" foldspaces: replace space-only chucks by \'y\'"); + +#define BINASCII_B2A_BASE85_METHODDEF \ + {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__}, + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, + int pad, int foldnuls, int foldspaces); + +static PyObject * +binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 5 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(data), &_Py_ID(chars), &_Py_ID(pad), &_Py_ID(foldnuls), &_Py_ID(foldspaces), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "chars", "pad", "foldnuls", "foldspaces", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; + Py_buffer data = {NULL, NULL}; + Py_buffer chars = {NULL, NULL}; + int pad = 0; + int foldnuls = 0; + int foldspaces = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 5, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (PyObject_GetBuffer(args[1], &chars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[2]) { + pad = PyObject_IsTrue(args[2]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[3]) { + foldnuls = PyObject_IsTrue(args[3]); + if (foldnuls < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + foldspaces = PyObject_IsTrue(args[4]); + if (foldspaces < 0) { + goto exit; + } +skip_optional_pos: + return_value = binascii_b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + /* Cleanup for chars */ + if (chars.obj) { + PyBuffer_Release(&chars); + } + + return return_value; +} +/*[clinic end generated code: output=ae4488d2f300a0ff input=a9049054013a1b77]*/ From 74fc245760ddeff612f7866a4ced3afb77cafc13 Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Sun, 16 Feb 2025 15:38:26 +0100 Subject: [PATCH 2/7] Add possible regression test in test_base64 Regression was found while testing the new C implementation, when foldspaces was used with b85encode (since a chunk could end in z without having been folded) --- Lib/test/test_base64.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 409c8c109e885f..d06fd58d39b628 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -528,6 +528,7 @@ def test_b85encode(self): b"""0123456789!@#0^&*();:<>,. []{}""": b"""VPa!sWoBn+X=-b1ZEkOHadLBXb#`}nd3r%YLqtVJM@UIZOH55pPf$@(""" b"""Q&d$}S6EqEFflSSG&MFiI5{CeBQRbjDkv#CIy^osE+AW7dwl""", + b"paddu\xc7": b'aA9O*b;k', b'no padding..': b'Zf_uPVPs@!Zf7no', b'zero compression\x00\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG00000', b'zero compression\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG0000', From 60a3ae64c885a04907841522de8436633e58db6d Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Sun, 16 Feb 2025 16:43:26 +0100 Subject: [PATCH 3/7] Review changes Modules/binascii.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply suggestions Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Modules/binascii.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 8d80abdb9268a1..7b0885a5d088ae 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1299,9 +1299,11 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, if (foldnuls && value == 0) { *ascii_data++ = 'z'; - } else if (foldspaces && value == 0x20202020) { + } + else if (foldspaces && value == 0x20202020) { *ascii_data++ = 'y'; - } else { + } + else { for (j = 0; j < 5 ; j++) { ascii_data[4 - j] = table[value % 85]; value /= 85; From aaa09e16e3fd2d2e38ab63d041b957485113fbca Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Sun, 16 Feb 2025 20:33:57 +0100 Subject: [PATCH 4/7] Review fixes: update algorithm Inspired from git source https://github.com/git/git/blob/03944513488db4a81fdb4c21c3b515e4cb260b05/base85.c#L79 This avoid checking the chunk size on every iteration and thus improves performance --- Modules/binascii.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 7b0885a5d088ae..97d75b9ef0414d 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1286,16 +1286,20 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, const unsigned char *table = chars->buf; const unsigned char *bin_data = data->buf; - size_t i, j; - for (i = 0; i < bin_len; i += 4) { - const size_t chunk_size = (bin_len - i >= 4) ? 4 : (bin_len - i); + size_t i = 0 ; + int padding = 0; - // translate chunk to 32bit integer + while (i < bin_len) { + // translate each 4 byte chunk to 32bit integer uint32_t value = 0; - for (j = 0; j < chunk_size; j++) { - value = (value << 8) | bin_data[i + j]; + for (int cnt = 24; cnt >= 0; cnt -= 8) { + value |= bin_data[i] << cnt; + if (++i == bin_len) { + // Number of bytes under the 4 bytes rounded value + padding = cnt / 8; + break; + } } - value <<= (4 - chunk_size) * 8; if (foldnuls && value == 0) { *ascii_data++ = 'z'; @@ -1304,7 +1308,7 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, *ascii_data++ = 'y'; } else { - for (j = 0; j < 5 ; j++) { + for (int j = 0; j < 5 ; j++) { ascii_data[4 - j] = table[value % 85]; value /= 85; } @@ -1312,17 +1316,14 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, } } - // In case `i` went over the input size, we may need to shorten the output - const size_t overflow = (i - bin_len); - - if (overflow && !pad && foldnuls && ascii_data[-1] == 'z') { + if (padding && !pad && foldnuls && ascii_data[-1] == 'z') { ascii_data--; memset(ascii_data, table[0], 5); ascii_data += 5; } if (!pad) { - ascii_data -= overflow; + ascii_data -= padding; } return _PyBytesWriter_Finish(&writer, ascii_data); From cb46a5db57abd8b12d790f454e07dbf1d4a244c5 Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Mon, 17 Feb 2025 08:27:44 +0100 Subject: [PATCH 5/7] Further plagiate git's implementation Since j is not unsigned anymore we can reverse the table lookup loop --- Modules/binascii.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 97d75b9ef0414d..ba1eb6ca61073c 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1308,8 +1308,8 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, *ascii_data++ = 'y'; } else { - for (int j = 0; j < 5 ; j++) { - ascii_data[4 - j] = table[value % 85]; + for (int j = 4; j >= 0; j--) { + ascii_data[j] = table[value % 85]; value /= 85; } ascii_data += 5; From c88450ba34e4ba9c4fbbe470a6c54a5a3bcf709d Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Mon, 17 Feb 2025 22:47:45 +0100 Subject: [PATCH 6/7] Rename b2a_base85 as private for now --- Lib/base64.py | 4 ++-- Modules/binascii.c | 11 ++++++----- Modules/clinic/binascii.c.h | 23 ++++++++++++----------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 1de57ef9cd97a3..8ab27441ce482f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -302,8 +302,8 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): if not isinstance(b, bytes_types): b = memoryview(b).tobytes() - return binascii.b2a_base85(b, chars=chars, pad=pad, - foldnuls=foldnuls, foldspaces=foldspaces) + return binascii._b2a_base85(b, chars=chars, pad=pad, + foldnuls=foldnuls, foldspaces=foldspaces) def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. diff --git a/Modules/binascii.c b/Modules/binascii.c index ba1eb6ca61073c..67a0954e1c1944 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1240,7 +1240,7 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs, } /*[clinic input] -binascii.b2a_base85 +binascii._b2a_base85 data: Py_buffer chars: Py_buffer @@ -1259,9 +1259,10 @@ Utility method used by the base64 module to encode a85/b85 data [clinic start generated code]*/ static PyObject * -binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, - int pad, int foldnuls, int foldspaces) -/*[clinic end generated code: output=0a92b3c535580aa0 input=a2d8ae712ed5adba]*/ +binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, + Py_buffer *chars, int pad, int foldnuls, + int foldspaces) +/*[clinic end generated code: output=cefe84c300ad7314 input=3c8faf77b992dcc2]*/ { if (chars->len != 85) { PyErr_SetString(PyExc_ValueError, @@ -1336,7 +1337,7 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF - BINASCII_B2A_BASE85_METHODDEF + BINASCII__B2A_BASE85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 791db5864a8a4c..3e5a22b4ee4433 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -775,9 +775,9 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -PyDoc_STRVAR(binascii_b2a_base85__doc__, -"b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n" -" foldspaces=False)\n" +PyDoc_STRVAR(binascii__b2a_base85__doc__, +"_b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n" +" foldspaces=False)\n" "--\n" "\n" "Utility method used by the base64 module to encode a85/b85 data\n" @@ -788,15 +788,16 @@ PyDoc_STRVAR(binascii_b2a_base85__doc__, " foldnuls: replace NULL chunks by \'z\'\n" " foldspaces: replace space-only chucks by \'y\'"); -#define BINASCII_B2A_BASE85_METHODDEF \ - {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__}, +#define BINASCII__B2A_BASE85_METHODDEF \ + {"_b2a_base85", _PyCFunction_CAST(binascii__b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii__b2a_base85__doc__}, static PyObject * -binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, Py_buffer *chars, - int pad, int foldnuls, int foldspaces); +binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, + Py_buffer *chars, int pad, int foldnuls, + int foldspaces); static PyObject * -binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +binascii__b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -820,7 +821,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P static const char * const _keywords[] = {"data", "chars", "pad", "foldnuls", "foldspaces", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "b2a_base85", + .fname = "_b2a_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -869,7 +870,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto exit; } skip_optional_pos: - return_value = binascii_b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces); + return_value = binascii__b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces); exit: /* Cleanup for data */ @@ -883,4 +884,4 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P return return_value; } -/*[clinic end generated code: output=ae4488d2f300a0ff input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a1f5ae9968e8e52d input=a9049054013a1b77]*/ From 2fc892cd05efbbaff2e0c4a171cdda155e26720a Mon Sep 17 00:00:00 2001 From: Romuald Brunet Date: Mon, 17 Feb 2025 22:54:45 +0100 Subject: [PATCH 7/7] Credit to git's implementation and more comments --- Modules/binascii.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 67a0954e1c1944..bd67a656d2b8a2 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1290,8 +1290,9 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, size_t i = 0 ; int padding = 0; + // Conversion largely inspired from git base85 implementation while (i < bin_len) { - // translate each 4 byte chunk to 32bit integer + // Translate each 4 byte chunk to 32bit integer uint32_t value = 0; for (int cnt = 24; cnt >= 0; cnt -= 8) { value |= bin_data[i] << cnt; @@ -1302,6 +1303,7 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, } } + // Handle NULL only and space-only cases (specific to ASCII85) if (foldnuls && value == 0) { *ascii_data++ = 'z'; } @@ -1317,6 +1319,7 @@ binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, } } + // Expand the last folded null in case it did not fill a full chunk if (padding && !pad && foldnuls && ascii_data[-1] == 'z') { ascii_data--; memset(ascii_data, table[0], 5);