Skip to content

gh-106320: Remove _PyUnicode_TransformDecimalAndSpaceToASCII() #106398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 0 additions & 37 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,6 @@ typedef struct {
} data; /* Canonical, smallest-form Unicode buffer */
} PyUnicodeObject;

PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
PyObject *op,
int check_content);


#define _PyASCIIObject_CAST(op) \
(assert(PyUnicode_Check(op)), \
Expand Down Expand Up @@ -461,19 +457,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);

#define _PyUnicode_AsString PyUnicode_AsUTF8

/* --- Decimal Encoder ---------------------------------------------------- */

/* Coverts a Unicode object holding a decimal value to an ASCII string
for using in int, float and complex parsers.
Transforms code points that have decimal digit property to the
corresponding ASCII digit code points. Transforms spaces to ASCII.
Transforms code points starting from the first non-ASCII code point that
is neither a decimal digit nor a space to the end into '?'. */

PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
PyObject *unicode /* Unicode object */
);

/* === Characters Type APIs =============================================== */

/* These should not be used directly. Use the Py_UNICODE_IS* and
Expand Down Expand Up @@ -623,23 +606,3 @@ static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) {
|| Py_UNICODE_ISDIGIT(ch)
|| Py_UNICODE_ISNUMERIC(ch));
}


/* === Misc functions ===================================================== */

PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);

/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);

/* Fast equality check when the inputs are known to be exact unicode types
and where the hash values are equal (i.e. a very probable match) */
PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);

/* Equality check. */
PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);

PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);

PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
39 changes: 37 additions & 2 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,12 @@ extern "C" {
#include "pycore_fileutils.h" // _Py_error_handler
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI

void _PyUnicode_ExactDealloc(PyObject *op);
Py_ssize_t _PyUnicode_InternedSize(void);
PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
PyObject *op,
int check_content);

extern void _PyUnicode_ExactDealloc(PyObject *op);
extern Py_ssize_t _PyUnicode_InternedSize(void);

/* Get a copy of a Unicode string. */
PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
Expand Down Expand Up @@ -277,6 +281,18 @@ extern PyObject* _PyUnicode_EncodeCharmap(
PyObject *mapping, /* encoding mapping */
const char *errors); /* error handling */

/* --- Decimal Encoder ---------------------------------------------------- */

/* Coverts a Unicode object holding a decimal value to an ASCII string
for using in int, float and complex parsers.
Transforms code points that have decimal digit property to the
corresponding ASCII digit code points. Transforms spaces to ASCII.
Transforms code points starting from the first non-ASCII code point that
is neither a decimal digit nor a space to the end into '?'. */

PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
PyObject *unicode); /* Unicode object */

/* --- Methods & Slots ---------------------------------------------------- */

extern PyObject* _PyUnicode_JoinArray(
Expand Down Expand Up @@ -323,6 +339,25 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
PyObject *thousands_sep,
Py_UCS4 *maxchar);

/* --- Misc functions ----------------------------------------------------- */

extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);

/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);

/* Fast equality check when the inputs are known to be exact unicode types
and where the hash values are equal (i.e. a very probable match) */
extern int _PyUnicode_EQ(PyObject *, PyObject *);

/* Equality check. */
PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);

extern int _PyUnicode_WideCharString_Converter(PyObject *, void *);
extern int _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);

PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);

/* --- Runtime lifecycle -------------------------------------------------- */

extern void _PyUnicode_InitState(PyInterpreterState *);
Expand Down
8 changes: 6 additions & 2 deletions Lib/test/test_capi/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import _testcapi
except ImportError:
_testcapi = None
try:
import _testinternalcapi
except ImportError:
_testinternalcapi = None


NULL = None
Expand Down Expand Up @@ -913,10 +917,10 @@ def test_getdefaultencoding(self):
self.assertEqual(getdefaultencoding(), b'utf-8')

@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
@unittest.skipIf(_testinternalcapi is None, 'need _testinternalcapi module')
def test_transform_decimal_and_space(self):
"""Test _PyUnicode_TransformDecimalAndSpaceToASCII()"""
from _testcapi import unicode_transformdecimalandspacetoascii as transform_decimal
from _testinternalcapi import _PyUnicode_TransformDecimalAndSpaceToASCII as transform_decimal

self.assertEqual(transform_decimal('123'),
'123')
Expand Down
9 changes: 0 additions & 9 deletions Modules/_testcapi/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -660,14 +660,6 @@ unicode_getdefaultencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
return PyBytes_FromString(s);
}

/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */
static PyObject *
unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg)
{
NULLABLE(arg);
return _PyUnicode_TransformDecimalAndSpaceToASCII(arg);
}

/* Test PyUnicode_DecodeUTF8() */
static PyObject *
unicode_decodeutf8(PyObject *self, PyObject *args)
Expand Down Expand Up @@ -1544,7 +1536,6 @@ static PyMethodDef TestMethods[] = {
{"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS},
{"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS},
{"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS},
{"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O},
{"unicode_concat", unicode_concat, METH_VARARGS},
{"unicode_splitlines", unicode_splitlines, METH_VARARGS},
{"unicode_split", unicode_split, METH_VARARGS},
Expand Down
12 changes: 12 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,17 @@ test_tstate_capi(PyObject *self, PyObject *Py_UNUSED(args))
}


/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */
static PyObject *
unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg)
{
if (arg == Py_None) {
arg = NULL;
}
return _PyUnicode_TransformDecimalAndSpaceToASCII(arg);
}


static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
Expand Down Expand Up @@ -1304,6 +1315,7 @@ static PyMethodDef module_functions[] = {
{"_PyTime_ObjectToTimeval", test_pytime_object_to_timeval, METH_VARARGS},
{"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS},
{"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL},
{"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O},
{NULL, NULL} /* sentinel */
};

Expand Down
1 change: 1 addition & 0 deletions Python/pystrhex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "Python.h"
#include "pycore_strhex.h" // _Py_strhex_with_sep()
#include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency()
#include <stdlib.h> // abs()

static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
Expand Down