Skip to content

Commit d0dc245

Browse files
encukoupicnixz
andcommitted
pythongh-133157: remove usage of _Py_NO_SANITIZE_UNDEFINED in pyexpat
Co-Authored-By: Bénédikt Tran <[email protected]>
1 parent 13cb8ca commit d0dc245

File tree

1 file changed

+78
-6
lines changed

1 file changed

+78
-6
lines changed

Modules/pyexpat.c

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,11 @@ typedef struct {
9898

9999
#define CHARACTER_DATA_BUFFER_SIZE 8192
100100

101-
typedef const void *xmlhandler;
101+
// A generic function type for storage.
102+
// To avoid undefined behavior, a handler needs to be cast to the correct
103+
// function type before it's called; see SETTER_WRAPPER below.
104+
typedef void (*xmlhandler)(void);
105+
102106
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
103107

104108
struct HandlerInfo {
@@ -110,9 +114,7 @@ struct HandlerInfo {
110114

111115
static struct HandlerInfo handler_info[64];
112116

113-
// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
114-
// handler API for each handler.
115-
static inline void _Py_NO_SANITIZE_UNDEFINED
117+
static inline void
116118
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
117119
XML_Parser xml_parser, xmlhandler xml_handler)
118120
{
@@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
13651367
elaborate system of handlers and state could remove the
13661368
C handler more effectively. */
13671369
if (handlernum == CharacterData && self->in_callback) {
1368-
c_handler = noop_character_data_handler;
1370+
c_handler = (xmlhandler)noop_character_data_handler;
13691371
}
13701372
v = NULL;
13711373
}
@@ -2222,13 +2224,83 @@ clear_handlers(xmlparseobject *self, int initial)
22222224
}
22232225
}
22242226

2227+
/* To avoid undefined behavior, a function needs to be *called* via a function
2228+
* pointer of the correct type.
2229+
* So, for each `XML_Set*` function, we define a wrapper that calls the
2230+
* `XML_Set*` with its argument cast to the appropriate type.
2231+
*/
2232+
2233+
typedef void (*parser_only)(void *);
2234+
typedef int (*not_standalone)(void *);
2235+
typedef void (*parser_and_data)(void *, const XML_Char *);
2236+
typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int);
2237+
typedef void (*parser_and_data_and_data)(
2238+
void *, const XML_Char *, const XML_Char *);
2239+
typedef void (*start_element)(void *, const XML_Char *, const XML_Char **);
2240+
typedef void (*element_decl)(void *, const XML_Char *, XML_Content *);
2241+
typedef void (*xml_decl)(
2242+
void *, const XML_Char *, const XML_Char *, int);
2243+
typedef void (*start_doctype_decl)(
2244+
void *, const XML_Char *, const XML_Char *, const XML_Char *, int);
2245+
typedef void (*notation_decl)(
2246+
void *,
2247+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
2248+
typedef void (*attlist_decl)(
2249+
void *,
2250+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
2251+
int);
2252+
typedef void (*unparsed_entity_decl)(
2253+
void *,
2254+
const XML_Char *, const XML_Char *,
2255+
const XML_Char *, const XML_Char *, const XML_Char *);
2256+
typedef void (*entity_decl)(
2257+
void *,
2258+
const XML_Char *, int,
2259+
const XML_Char *, int,
2260+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
2261+
typedef int (*external_entity_ref)(
2262+
XML_Parser,
2263+
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
2264+
2265+
#define SETTER_WRAPPER(NAME, TYPE) \
2266+
static inline void \
2267+
my_Set ## NAME (XML_Parser parser, xmlhandler handler) \
2268+
{ \
2269+
(void)XML_Set ## NAME (parser, (TYPE)handler); \
2270+
}
2271+
2272+
SETTER_WRAPPER(StartElementHandler, start_element)
2273+
SETTER_WRAPPER(EndElementHandler, parser_and_data)
2274+
SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
2275+
SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
2276+
SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
2277+
SETTER_WRAPPER(NotationDeclHandler, notation_decl)
2278+
SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
2279+
SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
2280+
SETTER_WRAPPER(CommentHandler, parser_and_data)
2281+
SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
2282+
SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
2283+
SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
2284+
SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
2285+
SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
2286+
SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
2287+
SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
2288+
SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
2289+
SETTER_WRAPPER(EntityDeclHandler, entity_decl)
2290+
SETTER_WRAPPER(XmlDeclHandler, xml_decl)
2291+
SETTER_WRAPPER(ElementDeclHandler, element_decl)
2292+
SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
2293+
#if XML_COMBINED_VERSION >= 19504
2294+
SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
2295+
#endif
2296+
22252297
static struct HandlerInfo handler_info[] = {
22262298

22272299
// The cast to `xmlhandlersetter` is needed as the signature of XML
22282300
// handler functions is not compatible with `xmlhandlersetter` since
22292301
// their second parameter is narrower than a `const void *`.
22302302
#define HANDLER_INFO(name) \
2231-
{#name, (xmlhandlersetter)XML_Set##name, my_##name},
2303+
{#name, (xmlhandlersetter)my_Set##name, (xmlhandler)my_##name},
22322304

22332305
HANDLER_INFO(StartElementHandler)
22342306
HANDLER_INFO(EndElementHandler)

0 commit comments

Comments
 (0)