diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..8ab27441ce482f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -298,27 +298,12 @@ def b16decode(s, casefold=False): def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): # Helper function for a85encode and b85encode + # chars2 is now unused if not isinstance(b, bytes_types): b = memoryview(b).tobytes() - padding = (-len(b)) % 4 - if padding: - b = b + b'\0' * padding - words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) - - chunks = [b'z' if foldnuls and not word else - b'y' if foldspaces and word == 0x20202020 else - (chars2[word // 614125] + - chars2[word // 85 % 7225] + - chars[word % 85]) - for word in words] - - if padding and not pad: - if chunks[-1] == b'z': - chunks[-1] = chars[0] * 5 - chunks[-1] = chunks[-1][:-padding] - - return b''.join(chunks) + return binascii._b2a_base85(b, chars=chars, pad=pad, + foldnuls=foldnuls, foldspaces=foldspaces) def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. @@ -337,14 +322,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): adobe controls whether the encoded byte sequence is framed with <~ and ~>, which is used by the Adobe implementation. """ - global _a85chars, _a85chars2 + global _a85chars # Delay the initialization of tables to not waste memory # if the function is never called - if _a85chars2 is None: + if _a85chars is None: _a85chars = [bytes((i,)) for i in range(33, 118)] - _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] - result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) + result = _85encode(b, b''.join(_a85chars), None, pad, True, foldspaces) if adobe: result = _A85START + result @@ -445,13 +429,12 @@ def b85encode(b, pad=False): If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - global _b85chars, _b85chars2 + global _b85chars # Delay the initialization of tables to not waste memory # if the function is never called - if _b85chars2 is None: + if _b85chars is None: _b85chars = [bytes((i,)) for i in _b85alphabet] - _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] - return _85encode(b, _b85chars, _b85chars2, pad) + return _85encode(b, _b85alphabet, None, pad) def b85decode(b): """Decode the base85-encoded bytes-like object or ASCII string b diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 409c8c109e885f..d06fd58d39b628 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -528,6 +528,7 @@ def test_b85encode(self): b"""0123456789!@#0^&*();:<>,. []{}""": b"""VPa!sWoBn+X=-b1ZEkOHadLBXb#`}nd3r%YLqtVJM@UIZOH55pPf$@(""" b"""Q&d$}S6EqEFflSSG&MFiI5{CeBQRbjDkv#CIy^osE+AW7dwl""", + b"paddu\xc7": b'aA9O*b;k', b'no padding..': b'Zf_uPVPs@!Zf7no', b'zero compression\x00\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG00000', b'zero compression\x00\x00\x00': b'dS!BNAY*TBaB^jHb7^mG0000', diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..bd67a656d2b8a2 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1239,6 +1239,100 @@ binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs, return rv; } +/*[clinic input] +binascii._b2a_base85 + + data: Py_buffer + chars: Py_buffer + pad: bool = False + foldnuls: bool = False + foldspaces: bool = False + +Utility method used by the base64 module to encode a85/b85 data + + data: bytes + chars: 85 bytes conversion table + pad: use NULL-paded input if necessary + foldnuls: replace NULL chunks by 'z' + foldspaces: replace space-only chucks by 'y' + +[clinic start generated code]*/ + +static PyObject * +binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, + Py_buffer *chars, int pad, int foldnuls, + int foldspaces) +/*[clinic end generated code: output=cefe84c300ad7314 input=3c8faf77b992dcc2]*/ +{ + if (chars->len != 85) { + PyErr_SetString(PyExc_ValueError, + "chars must be exactly 85 bytes long"); + return NULL; + } + + _PyBytesWriter writer; + _PyBytesWriter_Init(&writer); + + const size_t bin_len = data->len; + + // Allocate up to maxium encoded length, adjusted at end + const size_t ascii_len = ((bin_len + 3) / 4) * 5; + + unsigned char *ascii_data = _PyBytesWriter_Alloc(&writer, ascii_len); + if (ascii_data == NULL) { + PyErr_NoMemory(); + return NULL; + } + + const unsigned char *table = chars->buf; + const unsigned char *bin_data = data->buf; + + size_t i = 0 ; + int padding = 0; + + // Conversion largely inspired from git base85 implementation + while (i < bin_len) { + // Translate each 4 byte chunk to 32bit integer + uint32_t value = 0; + for (int cnt = 24; cnt >= 0; cnt -= 8) { + value |= bin_data[i] << cnt; + if (++i == bin_len) { + // Number of bytes under the 4 bytes rounded value + padding = cnt / 8; + break; + } + } + + // Handle NULL only and space-only cases (specific to ASCII85) + if (foldnuls && value == 0) { + *ascii_data++ = 'z'; + } + else if (foldspaces && value == 0x20202020) { + *ascii_data++ = 'y'; + } + else { + for (int j = 4; j >= 0; j--) { + ascii_data[j] = table[value % 85]; + value /= 85; + } + ascii_data += 5; + } + } + + // Expand the last folded null in case it did not fill a full chunk + if (padding && !pad && foldnuls && ascii_data[-1] == 'z') { + ascii_data--; + memset(ascii_data, table[0], 5); + ascii_data += 5; + } + + if (!pad) { + ascii_data -= padding; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} + /* List of functions defined in the module */ static struct PyMethodDef binascii_module_methods[] = { @@ -1246,6 +1340,7 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF + BINASCII__B2A_BASE85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index f81f12c388f373..3e5a22b4ee4433 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -774,4 +774,114 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=9ed7fbeec13c6606 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(binascii__b2a_base85__doc__, +"_b2a_base85($module, /, data, chars, pad=False, foldnuls=False,\n" +" foldspaces=False)\n" +"--\n" +"\n" +"Utility method used by the base64 module to encode a85/b85 data\n" +"\n" +" data: bytes\n" +" chars: 85 bytes conversion table\n" +" pad: use NULL-paded input if necessary\n" +" foldnuls: replace NULL chunks by \'z\'\n" +" foldspaces: replace space-only chucks by \'y\'"); + +#define BINASCII__B2A_BASE85_METHODDEF \ + {"_b2a_base85", _PyCFunction_CAST(binascii__b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii__b2a_base85__doc__}, + +static PyObject * +binascii__b2a_base85_impl(PyObject *module, Py_buffer *data, + Py_buffer *chars, int pad, int foldnuls, + int foldspaces); + +static PyObject * +binascii__b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 5 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(data), &_Py_ID(chars), &_Py_ID(pad), &_Py_ID(foldnuls), &_Py_ID(foldspaces), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"data", "chars", "pad", "foldnuls", "foldspaces", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_b2a_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; + Py_buffer data = {NULL, NULL}; + Py_buffer chars = {NULL, NULL}; + int pad = 0; + int foldnuls = 0; + int foldspaces = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 5, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (PyObject_GetBuffer(args[1], &chars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[2]) { + pad = PyObject_IsTrue(args[2]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[3]) { + foldnuls = PyObject_IsTrue(args[3]); + if (foldnuls < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + foldspaces = PyObject_IsTrue(args[4]); + if (foldspaces < 0) { + goto exit; + } +skip_optional_pos: + return_value = binascii__b2a_base85_impl(module, &data, &chars, pad, foldnuls, foldspaces); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + /* Cleanup for chars */ + if (chars.obj) { + PyBuffer_Release(&chars); + } + + return return_value; +} +/*[clinic end generated code: output=a1f5ae9968e8e52d input=a9049054013a1b77]*/