Skip to content

Commit 3c8cc13

Browse files
committed
gh-106320: Remove more private _PyUnicode C API functions (#106382)
Remove private _PyUnicode codecs C API functions: move them to the internal C API (pycore_unicodeobject.h). No longer export these functions.
1 parent f6d2bb1 commit 3c8cc13

File tree

3 files changed

+101
-106
lines changed

3 files changed

+101
-106
lines changed

Include/cpython/unicodeobject.h

-106
Original file line numberDiff line numberDiff line change
@@ -461,112 +461,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
461461

462462
#define _PyUnicode_AsString PyUnicode_AsUTF8
463463

464-
/* --- UTF-7 Codecs ------------------------------------------------------- */
465-
466-
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
467-
PyObject *unicode, /* Unicode object */
468-
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
469-
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
470-
const char *errors /* error handling */
471-
);
472-
473-
/* --- UTF-8 Codecs ------------------------------------------------------- */
474-
475-
PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
476-
PyObject *unicode,
477-
const char *errors);
478-
479-
/* --- UTF-32 Codecs ------------------------------------------------------ */
480-
481-
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
482-
PyObject *object, /* Unicode object */
483-
const char *errors, /* error handling */
484-
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
485-
);
486-
487-
/* --- UTF-16 Codecs ------------------------------------------------------ */
488-
489-
/* Returns a Python string object holding the UTF-16 encoded value of
490-
the Unicode data.
491-
492-
If byteorder is not 0, output is written according to the following
493-
byte order:
494-
495-
byteorder == -1: little endian
496-
byteorder == 0: native byte order (writes a BOM mark)
497-
byteorder == 1: big endian
498-
499-
If byteorder is 0, the output string will always start with the
500-
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
501-
prepended.
502-
*/
503-
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
504-
PyObject* unicode, /* Unicode object */
505-
const char *errors, /* error handling */
506-
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
507-
);
508-
509-
/* --- Unicode-Escape Codecs ---------------------------------------------- */
510-
511-
/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
512-
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
513-
const char *string, /* Unicode-Escape encoded string */
514-
Py_ssize_t length, /* size of string */
515-
const char *errors, /* error handling */
516-
Py_ssize_t *consumed /* bytes consumed */
517-
);
518-
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
519-
chars. */
520-
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
521-
const char *string, /* Unicode-Escape encoded string */
522-
Py_ssize_t length, /* size of string */
523-
const char *errors, /* error handling */
524-
Py_ssize_t *consumed, /* bytes consumed */
525-
const char **first_invalid_escape /* on return, points to first
526-
invalid escaped char in
527-
string. */
528-
);
529-
530-
/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
531-
532-
/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
533-
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
534-
const char *string, /* Unicode-Escape encoded string */
535-
Py_ssize_t length, /* size of string */
536-
const char *errors, /* error handling */
537-
Py_ssize_t *consumed /* bytes consumed */
538-
);
539-
540-
/* --- Latin-1 Codecs ----------------------------------------------------- */
541-
542-
PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
543-
PyObject* unicode,
544-
const char* errors);
545-
546-
/* --- ASCII Codecs ------------------------------------------------------- */
547-
548-
PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
549-
PyObject* unicode,
550-
const char* errors);
551-
552-
/* --- Character Map Codecs ----------------------------------------------- */
553-
554-
/* Translate an Unicode object by applying a character mapping table to
555-
it and return the resulting Unicode object.
556-
557-
The mapping table must map Unicode ordinal integers to Unicode strings,
558-
Unicode ordinal integers or None (causing deletion of the character).
559-
560-
Mapping tables may be dictionaries or sequences. Unmapped character
561-
ordinals (ones which cause a LookupError) are left untouched and
562-
are copied as-is.
563-
*/
564-
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
565-
PyObject *unicode, /* Unicode object */
566-
PyObject *mapping, /* encoding mapping */
567-
const char *errors /* error handling */
568-
);
569-
570464
/* --- Decimal Encoder ---------------------------------------------------- */
571465

572466
/* Coverts a Unicode object holding a decimal value to an ASCII string

Include/internal/pycore_unicodeobject.h

+100
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,106 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
177177
Py_ssize_t start,
178178
Py_ssize_t end);
179179

180+
/* --- UTF-7 Codecs ------------------------------------------------------- */
181+
182+
extern PyObject* _PyUnicode_EncodeUTF7(
183+
PyObject *unicode, /* Unicode object */
184+
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
185+
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
186+
const char *errors); /* error handling */
187+
188+
/* --- UTF-8 Codecs ------------------------------------------------------- */
189+
190+
PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
191+
PyObject *unicode,
192+
const char *errors);
193+
194+
/* --- UTF-32 Codecs ------------------------------------------------------ */
195+
196+
extern PyObject* _PyUnicode_EncodeUTF32(
197+
PyObject *object, /* Unicode object */
198+
const char *errors, /* error handling */
199+
int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */
200+
201+
/* --- UTF-16 Codecs ------------------------------------------------------ */
202+
203+
/* Returns a Python string object holding the UTF-16 encoded value of
204+
the Unicode data.
205+
206+
If byteorder is not 0, output is written according to the following
207+
byte order:
208+
209+
byteorder == -1: little endian
210+
byteorder == 0: native byte order (writes a BOM mark)
211+
byteorder == 1: big endian
212+
213+
If byteorder is 0, the output string will always start with the
214+
Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
215+
prepended.
216+
*/
217+
extern PyObject* _PyUnicode_EncodeUTF16(
218+
PyObject* unicode, /* Unicode object */
219+
const char *errors, /* error handling */
220+
int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */
221+
222+
/* --- Unicode-Escape Codecs ---------------------------------------------- */
223+
224+
/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
225+
extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful(
226+
const char *string, /* Unicode-Escape encoded string */
227+
Py_ssize_t length, /* size of string */
228+
const char *errors, /* error handling */
229+
Py_ssize_t *consumed); /* bytes consumed */
230+
231+
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
232+
chars. */
233+
extern PyObject* _PyUnicode_DecodeUnicodeEscapeInternal(
234+
const char *string, /* Unicode-Escape encoded string */
235+
Py_ssize_t length, /* size of string */
236+
const char *errors, /* error handling */
237+
Py_ssize_t *consumed, /* bytes consumed */
238+
const char **first_invalid_escape); /* on return, points to first
239+
invalid escaped char in
240+
string. */
241+
242+
/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
243+
244+
/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
245+
extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful(
246+
const char *string, /* Unicode-Escape encoded string */
247+
Py_ssize_t length, /* size of string */
248+
const char *errors, /* error handling */
249+
Py_ssize_t *consumed); /* bytes consumed */
250+
251+
/* --- Latin-1 Codecs ----------------------------------------------------- */
252+
253+
extern PyObject* _PyUnicode_AsLatin1String(
254+
PyObject* unicode,
255+
const char* errors);
256+
257+
/* --- ASCII Codecs ------------------------------------------------------- */
258+
259+
extern PyObject* _PyUnicode_AsASCIIString(
260+
PyObject* unicode,
261+
const char* errors);
262+
263+
/* --- Character Map Codecs ----------------------------------------------- */
264+
265+
/* Translate an Unicode object by applying a character mapping table to
266+
it and return the resulting Unicode object.
267+
268+
The mapping table must map Unicode ordinal integers to Unicode strings,
269+
Unicode ordinal integers or None (causing deletion of the character).
270+
271+
Mapping tables may be dictionaries or sequences. Unmapped character
272+
ordinals (ones which cause a LookupError) are left untouched and
273+
are copied as-is.
274+
*/
275+
extern PyObject* _PyUnicode_EncodeCharmap(
276+
PyObject *unicode, /* Unicode object */
277+
PyObject *mapping, /* encoding mapping */
278+
const char *errors); /* error handling */
279+
180280
/* --- Methods & Slots ---------------------------------------------------- */
181281

182282
extern PyObject* _PyUnicode_JoinArray(

Parser/string_parser.c

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <stdbool.h>
22

33
#include <Python.h>
4+
#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
45

56
#include "tokenizer.h"
67
#include "pegen.h"

0 commit comments

Comments
 (0)