Skip to content

Commit 0785c68

Browse files
authored
gh-111972: Make Unicode name C APIcapsule initialization thread-safe (#112249)
1 parent 81261fa commit 0785c68

File tree

3 files changed

+26
-20
lines changed

3 files changed

+26
-20
lines changed

Include/internal/pycore_ucnhash.h

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ typedef struct {
2828

2929
} _PyUnicode_Name_CAPI;
3030

31+
extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void);
32+
3133
#ifdef __cplusplus
3234
}
3335
#endif

Objects/unicodeobject.c

+21-11
Original file line numberDiff line numberDiff line change
@@ -5869,6 +5869,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
58695869
return _PyUnicode_EncodeUTF16(unicode, NULL, 0);
58705870
}
58715871

5872+
_PyUnicode_Name_CAPI *
5873+
_PyUnicode_GetNameCAPI(void)
5874+
{
5875+
PyInterpreterState *interp = _PyInterpreterState_GET();
5876+
_PyUnicode_Name_CAPI *ucnhash_capi;
5877+
5878+
ucnhash_capi = _Py_atomic_load_ptr(&interp->unicode.ucnhash_capi);
5879+
if (ucnhash_capi == NULL) {
5880+
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
5881+
PyUnicodeData_CAPSULE_NAME, 1);
5882+
5883+
// It's fine if we overwite the value here. It's always the same value.
5884+
_Py_atomic_store_ptr(&interp->unicode.ucnhash_capi, ucnhash_capi);
5885+
}
5886+
return ucnhash_capi;
5887+
}
5888+
58725889
/* --- Unicode Escape Codec ----------------------------------------------- */
58735890

58745891
PyObject *
@@ -5884,7 +5901,6 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
58845901
PyObject *errorHandler = NULL;
58855902
PyObject *exc = NULL;
58865903
_PyUnicode_Name_CAPI *ucnhash_capi;
5887-
PyInterpreterState *interp = _PyInterpreterState_GET();
58885904

58895905
// so we can remember if we've seen an invalid escape char or not
58905906
*first_invalid_escape = NULL;
@@ -6032,19 +6048,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
60326048

60336049
/* \N{name} */
60346050
case 'N':
6035-
ucnhash_capi = interp->unicode.ucnhash_capi;
6051+
ucnhash_capi = _PyUnicode_GetNameCAPI();
60366052
if (ucnhash_capi == NULL) {
6037-
/* load the unicode data module */
6038-
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
6039-
PyUnicodeData_CAPSULE_NAME, 1);
6040-
if (ucnhash_capi == NULL) {
6041-
PyErr_SetString(
6053+
PyErr_SetString(
60426054
PyExc_UnicodeError,
60436055
"\\N escapes not supported (can't load unicodedata module)"
6044-
);
6045-
goto onError;
6046-
}
6047-
interp->unicode.ucnhash_capi = ucnhash_capi;
6056+
);
6057+
goto onError;
60486058
}
60496059

60506060
message = "malformed \\N character escape";

Python/codecs.c

+3-9
Original file line numberDiff line numberDiff line change
@@ -931,8 +931,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
931931
return Py_BuildValue("(Nn)", res, end);
932932
}
933933

934-
static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
935-
936934
PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
937935
{
938936
if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
@@ -953,13 +951,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
953951
return NULL;
954952
if (!(object = PyUnicodeEncodeError_GetObject(exc)))
955953
return NULL;
956-
if (!ucnhash_capi) {
957-
/* load the unicode data module */
958-
ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
959-
PyUnicodeData_CAPSULE_NAME, 1);
960-
if (!ucnhash_capi) {
961-
return NULL;
962-
}
954+
_PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI();
955+
if (ucnhash_capi == NULL) {
956+
return NULL;
963957
}
964958
for (i = start, ressize = 0; i < end; ++i) {
965959
/* object is guaranteed to be "ready" */

0 commit comments

Comments
 (0)