From dd606ae1965e320a80e95dea46ca4fbb8b18284b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 28 Sep 2023 16:27:11 +0100 Subject: [PATCH 1/4] Add valid flag to executor objects --- Include/cpython/optimizer.h | 1 + Python/optimizer.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 47536108a9665e..c1ff7ae1afbdb9 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -16,6 +16,7 @@ typedef struct _PyExecutorObject { /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ struct _PyInterpreterFrame *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + bool valid; /* This should be set to true by the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; diff --git a/Python/optimizer.c b/Python/optimizer.c index fbdbf7291784c4..3d2d91774ce036 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -261,6 +261,7 @@ counter_optimize( executor->optimizer = (_PyCounterOptimizerObject *)self; executor->next_instr = instr; *exec_ptr = (_PyExecutorObject *)executor; + executor->executor.valid = true; return 1; } @@ -904,6 +905,7 @@ uop_optimize( executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); *exec_ptr = (_PyExecutorObject *)executor; + executor->base.valid = true; return 1; } From 45c30d369ea9c96e33aa3d288a34782f662094f0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 1 Oct 2023 17:40:14 +0100 Subject: [PATCH 2/4] Add ability to invalidate executors as a result of changes to other objects. --- Include/cpython/code.h | 3 +- Include/cpython/optimizer.h | 20 ++- Include/internal/pycore_interp.h | 1 + Include/internal/pycore_opcode_metadata.h | 7 + Lib/test/test_capi/test_misc.py | 44 +++++ Modules/_testinternalcapi.c | 28 ++++ Objects/codeobject.c | 2 + Python/abstract_interp_cases.c.h | 4 + Python/bytecodes.c | 6 + Python/executor_cases.c.h | 7 + Python/instrumentation.c | 1 + Python/optimizer.c | 191 +++++++++++++++++++++- Python/pystate.c | 1 + 13 files changed, 311 insertions(+), 4 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 45b09a1265df80..25d12b33a65c1a 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -280,7 +280,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i #define PY_FOREACH_CODE_EVENT(V) \ V(CREATE) \ - V(DESTROY) + V(DESTROY) \ + V(INSTRUMENT) typedef enum { #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index c1ff7ae1afbdb9..f5b0a4169cad17 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -6,9 +6,23 @@ extern "C" { #endif +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + +/* Bloom filter with k = 6, m = 256 */ +typedef struct _bloom_filter { + uint32_t bits[8]; +} _PyBloomFilter; + typedef struct { uint8_t opcode; uint8_t oparg; + uint8_t valid; + uint8_t linked; + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; } _PyVMData; typedef struct _PyExecutorObject { @@ -16,7 +30,6 @@ typedef struct _PyExecutorObject { /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ struct _PyInterpreterFrame *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ - bool valid; /* This should be set to true by the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; @@ -46,6 +59,11 @@ _PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_ extern _PyOptimizerObject _PyOptimizer_Default; +void _Py_ExecutorInit(_PyExecutorObject *); +void _Py_ExecutorClear(_PyExecutorObject *); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); + /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0912bd175fe4f7..2a8a8d753e6235 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -186,6 +186,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; _PyOptimizerObject *optimizer; + _PyExecutorObject *executor_list_head; uint16_t optimizer_resume_threshold; uint16_t optimizer_backedge_threshold; uint32_t next_func_version; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 16c1637e496033..cce460c60ab861 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -80,6 +80,7 @@ #define _JUMP_TO_TOP 352 #define _SAVE_CURRENT_IP 353 #define _INSERT 354 +#define _JUMP_IF_INVALID 355 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -633,6 +634,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; + case _JUMP_IF_INVALID: + return 0; default: return -1; } @@ -1191,6 +1194,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case _INSERT: return oparg + 1; + case _JUMP_IF_INVALID: + return 0; default: return -1; } @@ -1538,6 +1543,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_SAVE_CURRENT_IP] = { true, INSTR_FMT_IX, 0 }, [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [_JUMP_IF_INVALID] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, }; #endif // NEED_OPCODE_METADATA @@ -1745,6 +1751,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_JUMP_TO_TOP] = "_JUMP_TO_TOP", [_SAVE_CURRENT_IP] = "_SAVE_CURRENT_IP", [_INSERT] = "_INSERT", + [_JUMP_IF_INVALID] = "_JUMP_IF_INVALID", }; #endif // NEED_OPCODE_METADATA diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5ece213e7b2363..cdb251e7a64c10 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2387,6 +2387,50 @@ def testfunc(x): self.assertEqual(code, replace_code) self.assertEqual(hash(code), hash(replace_code)) +class TestExecutorInvalidation(unittest.TestCase): + + def setUp(self): + self.old = _testinternalcapi.get_optimizer() + self.opt = _testinternalcapi.get_counter_optimizer() + _testinternalcapi.set_optimizer(self.opt) + + def tearDown(self): + _testinternalcapi.set_optimizer(self.old) + + def test_invalidate_object(self): + # Generate a new set of functions at each call + ns = {} + func_src = "\n".join( + f""" + def f{n}(): + for _ in range(1000): + pass + """ for n in range(5) + ) + exec(textwrap.dedent(func_src), ns, ns) + funcs = [ ns[f'f{n}'] for n in range(5)] + objects = [object() for _ in range(5)] + + for f in funcs: + f() + executors = [ get_first_executor(f) for f in funcs] + # Set things up so each executor depends on the objects + # with an equal or lower index. + for i, exe in enumerate(executors): + self.assertTrue(exe.valid) + for obj in objects[:i+1]: + _testinternalcapi.add_executor_dependency(exe, obj) + self.assertTrue(exe.valid) + # Assert that the correct executors are invalidated + # and check that nothing crashes when we invalidate + # an executor mutliple times. + for i in (4,3,2,1,0): + _testinternalcapi.invalidate_executors(objects[i]) + for exe in executors[i:]: + self.assertFalse(exe.valid) + for exe in executors[:i]: + self.assertTrue(exe.valid) + def get_first_executor(func): code = func.__code__ diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c6b80fffdec16d..99c7533ce2008d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -998,6 +998,32 @@ get_executor(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, ioffset); } +static PyObject * +add_executor_dependency(PyObject *self, PyObject *args) +{ + PyObject *exec; + PyObject *obj; + if (!PyArg_ParseTuple(args, "OO", &exec, &obj)) { + return NULL; + } + /* No way to tell in general if exec is an executor, so we only accept + * counting_executor */ + if (strcmp(Py_TYPE(exec)->tp_name, "counting_executor")) { + PyErr_SetString(PyExc_TypeError, "argument must be a counting_executor"); + return NULL; + } + _Py_Executor_DependsOn((_PyExecutorObject *)exec, obj); + Py_RETURN_NONE; +} + +static PyObject * +invalidate_executors(PyObject *self, PyObject *obj) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + _Py_Executors_InvalidateDependency(interp, obj); + Py_RETURN_NONE; +} + static int _pending_callback(void *arg) { /* we assume the argument is callable object to which we own a reference */ @@ -1561,6 +1587,8 @@ static PyMethodDef module_functions[] = { {"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL}, {"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL}, {"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL}, + {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, + {"invalidate_executors", invalidate_executors, METH_O, NULL}, {"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc), METH_VARARGS | METH_KEYWORDS}, {"pending_identify", pending_identify, METH_VARARGS, NULL}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index f662b8e354bb1e..9e5e84b2d8d2f0 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -63,6 +63,8 @@ notify_code_watchers(PyCodeEvent event, PyCodeObject *co) } } + + int PyCode_AddWatcher(PyCode_WatchCallback callback) { diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 61b1db9e5a1543..26a4c2c939d2cc 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -880,3 +880,7 @@ PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); break; } + + case _JUMP_IF_INVALID: { + break; + } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0f89779fb9245f..44cc3acbafef65 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3957,6 +3957,12 @@ dummy_func( memmove(&stack_pointer[-1 - oparg], &stack_pointer[-oparg], oparg * sizeof(stack_pointer[0])); } + op(_JUMP_IF_INVALID, (--)) { + if (!self->base.vm_data.valid) { + pc = oparg; + } + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 981db6973f281a..3d0d0eb2827dcc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3159,3 +3159,10 @@ stack_pointer[-1 - oparg] = top; break; } + + case _JUMP_IF_INVALID: { + if (!self->base.vm_data.valid) { + pc = oparg; + } + break; + } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 0b974f6133ce7d..3fa544ab44410a 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1564,6 +1564,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) if (code->co_executors != NULL) { _PyCode_Clear_Executors(code); } + _Py_Executors_InvalidateDependency(interp, code); int code_len = (int)Py_SIZE(code); /* code->_co_firsttraceable >= code_len indicates * that no instrumentation can be inserted. diff --git a/Python/optimizer.c b/Python/optimizer.c index 3d2d91774ce036..6eecc9f99e0d54 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -220,10 +220,16 @@ typedef struct { static void counter_dealloc(_PyCounterExecutorObject *self) { + _Py_ExecutorClear((_PyExecutorObject *)self); Py_DECREF(self->optimizer); PyObject_Free(self); } +static PyMemberDef counter_members[] = { + { "valid", Py_T_UBYTE, offsetof(_PyCounterExecutorObject, executor.vm_data.valid), Py_READONLY, "is valid?" }, + { NULL } +}; + static PyTypeObject CounterExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "counting_executor", @@ -231,6 +237,7 @@ static PyTypeObject CounterExecutor_Type = { .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)counter_dealloc, + .tp_members = counter_members, }; static _PyInterpreterFrame * @@ -261,7 +268,7 @@ counter_optimize( executor->optimizer = (_PyCounterOptimizerObject *)self; executor->next_instr = instr; *exec_ptr = (_PyExecutorObject *)executor; - executor->executor.valid = true; + _Py_ExecutorInit((_PyExecutorObject *)executor); return 1; } @@ -289,6 +296,8 @@ static PyTypeObject CounterOptimizer_Type = { PyObject * PyUnstable_Optimizer_NewCounter(void) { + PyType_Ready(&CounterExecutor_Type); + PyType_Ready(&CounterOptimizer_Type); _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&CounterOptimizer_Type); if (opt == NULL) { return NULL; @@ -304,6 +313,7 @@ PyUnstable_Optimizer_NewCounter(void) static void uop_dealloc(_PyUOpExecutorObject *self) { + _Py_ExecutorClear((_PyExecutorObject *)self); PyObject_Free(self); } @@ -904,8 +914,8 @@ uop_optimize( } executor->base.execute = _PyUopExecute; memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); + _Py_ExecutorInit((_PyExecutorObject *)executor); *exec_ptr = (_PyExecutorObject *)executor; - executor->base.valid = true; return 1; } @@ -937,3 +947,180 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) opt->backedge_threshold = 16 << OPTIMIZER_BITS_IN_COUNTER; return (PyObject *)opt; } + + +/***************************************** + * Executor management + ****************************************/ + +static uint16_t +address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { + assert(ptr != NULL); + uint16_t uhash = seed; + uintptr_t addr = (uintptr_t)ptr; + for (int i = 0; i < SIZEOF_VOID_P; i++) { + uhash ^= addr & 255; + uhash *= multiplier; + addr >>= 8; + } + return uhash; +} + +static const uint32_t multipliers[3] = { + _PyHASH_MULTIPLIER, + 110351524, + 48271 +}; + +static const uint32_t seeds[3] = { + 20221211, + 2147483647, + 1051, +}; + +static void +address_to_hashes(void *ptr, _PyBloomFilter *bloom) +{ + for (int i = 0; i < 8; i++) { + bloom->bits[i] = 0; + } + /* Set k (6) bits */ + for (int i = 0; i < 3; i++) { + uint16_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); + int bit0 = hash & 255; + int bit1 = hash >> 8; + bloom->bits[bit0 >> 5] |= (1 << (bit0&31)); + bloom->bits[bit1 >> 5] |= (1 << (bit1&31)); + } +} + +static void +add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) +{ + _PyBloomFilter hashes; + address_to_hashes(obj, &hashes); + for (int i = 0; i < 8; i++) { + bloom->bits[i] |= hashes.bits[i]; + } +} + +static bool +bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes) +{ + for (int i = 0; i < 8; i++) { + if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) { + return false; + } + } + return true; +} + +static void +link_executor(_PyExecutorObject *executor) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyExecutorLinkListNode *links = &executor->vm_data.links; + _PyExecutorObject *head = interp->executor_list_head; + if (head == NULL) { + interp->executor_list_head = executor; + links->previous = NULL; + links->next = NULL; + } + else { + _PyExecutorObject *next = head->vm_data.links.next; + links->previous = head; + links->next = next; + if (next != NULL) { + next->vm_data.links.previous = executor; + } + head->vm_data.links.next = executor; + } + executor->vm_data.linked = true; +} + +static void +unlink_executor(_PyExecutorObject *executor) +{ + if (!executor->vm_data.linked) { + return; + } + _PyExecutorLinkListNode *links = &executor->vm_data.links; + _PyExecutorObject *next = links->next; + _PyExecutorObject *prev = links->previous; + if (next != NULL) { + next->vm_data.links.previous = prev; + } + else if (prev == NULL) { + // Both are NULL, so this must be the list head. + PyInterpreterState *interp = PyInterpreterState_Get(); + assert(interp->executor_list_head == executor); + interp->executor_list_head = executor->vm_data.links.next; + } + if (prev != NULL) { + prev->vm_data.links.next = next; + } + executor->vm_data.linked = false; +} + +/* This must be called by optimizers before using the executor */ +void +_Py_ExecutorInit(_PyExecutorObject *executor) +{ + executor->vm_data.valid = true; + for (int i = 0; i < 8; i++) { + executor->vm_data.bloom.bits[i] = 0; + } + link_executor(executor); +} + +/* This must be called by executors during dealloc */ +void +_Py_ExecutorClear(_PyExecutorObject *executor) +{ + unlink_executor(executor); +} + +void +_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) +{ + assert(executor->vm_data.valid = true); + add_to_bloom_filter(&executor->vm_data.bloom, obj); +} + +/* Invalidate all executors that depend on `obj` + * May cause other executors to be invalidated as well + */ +void +_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) +{ + _PyBloomFilter hashes; + address_to_hashes(obj, &hashes); + /* Walk the list of executors */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + if (bloom_filter_may_contain(&exec->vm_data.bloom, &hashes)) { + exec->vm_data.valid = false; + unlink_executor(exec); + } + exec = next; + } +} + +/* Invalidate all executors + */ +void +_Py_Executors_InvalidateAll(PyInterpreterState *interp) +{ + /* Walk the list of executors */ + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + assert(exec->vm_data.valid); + _PyExecutorObject *next = exec->vm_data.links.next; + exec->vm_data.links.next = NULL; + exec->vm_data.links.previous = NULL; + exec->vm_data.valid = false; + exec->vm_data.linked = false; + exec = next; + } + interp->executor_list_head = NULL; +} diff --git a/Python/pystate.c b/Python/pystate.c index 570b5242600c0c..4a887216d1a4d7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -708,6 +708,7 @@ init_interpreter(PyInterpreterState *interp, interp->optimizer_backedge_threshold = _PyOptimizer_Default.backedge_threshold; interp->optimizer_resume_threshold = _PyOptimizer_Default.backedge_threshold; interp->next_func_version = 1; + interp->executor_list_head = NULL; if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp); From 79bda49a5bc2534bed6b2fd7e59161ebcddb6f12 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Oct 2023 12:33:57 +0100 Subject: [PATCH 3/4] Remove unrelated change --- Include/cpython/code.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 25d12b33a65c1a..45b09a1265df80 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -280,8 +280,7 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i #define PY_FOREACH_CODE_EVENT(V) \ V(CREATE) \ - V(DESTROY) \ - V(INSTRUMENT) + V(DESTROY) typedef enum { #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, From 76dad942cdfaf3baa224048517dc56053e4405e0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Oct 2023 14:24:28 +0100 Subject: [PATCH 4/4] Tidier logic and add explanatory comment --- Include/cpython/optimizer.h | 6 ++-- Python/optimizer.c | 60 +++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index f5b0a4169cad17..505e5476895e80 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -11,9 +11,11 @@ typedef struct _PyExecutorLinkListNode { struct _PyExecutorObject *previous; } _PyExecutorLinkListNode; -/* Bloom filter with k = 6, m = 256 */ +/* Bloom filter with m = 256 */ +#define BLOOM_FILTER_WORDS 8 + typedef struct _bloom_filter { - uint32_t bits[8]; + uint32_t bits[BLOOM_FILTER_WORDS]; } _PyBloomFilter; typedef struct { diff --git a/Python/optimizer.c b/Python/optimizer.c index 6eecc9f99e0d54..a095d8d341ad12 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -953,10 +953,38 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) * Executor management ****************************************/ -static uint16_t +/* We use a bloomfilter with k = 6, m = 256 + * The choice of k and the following constants + * could do with a more rigourous analysis, + * but here is a simple analysis: + * + * We want to keep the false positive rate low. + * For n = 5 (a trace depends on 5 objects), + * we expect 30 bits set, giving a false positive + * rate of (30/256)**6 == 2.5e-6 which is plenty + * good enough. + * + * However with n = 10 we expect 60 bits set (worst case), + * giving a false positive of (60/256)**6 == 0.0001 + * + * We choose k = 6, rather than a higher number as + * it means the false positive rate goes slower for high n. + * + * n = 15, k = 6 => fp = 0.18% + * n = 15, k = 8 => fp = 0.23% + * n = 20, k = 6 => fp = 1.1% + * n = 20, k = 8 => fp = 2.3% + * + * The above analysis assumes perfect hash functions, + * but we don't have those, so it is approximate. + */ + +#define K 6 + +static uint32_t address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { assert(ptr != NULL); - uint16_t uhash = seed; + uint32_t uhash = seed; uintptr_t addr = (uintptr_t)ptr; for (int i = 0; i < SIZEOF_VOID_P; i++) { uhash ^= addr & 255; @@ -966,31 +994,30 @@ address_to_hash(void *ptr, uint32_t seed, uint32_t multiplier) { return uhash; } -static const uint32_t multipliers[3] = { +static const uint32_t multipliers[2] = { _PyHASH_MULTIPLIER, 110351524, - 48271 }; -static const uint32_t seeds[3] = { +static const uint32_t seeds[2] = { 20221211, 2147483647, - 1051, }; static void address_to_hashes(void *ptr, _PyBloomFilter *bloom) { - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { bloom->bits[i] = 0; } /* Set k (6) bits */ - for (int i = 0; i < 3; i++) { - uint16_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); - int bit0 = hash & 255; - int bit1 = hash >> 8; - bloom->bits[bit0 >> 5] |= (1 << (bit0&31)); - bloom->bits[bit1 >> 5] |= (1 << (bit1&31)); + for (int i = 0; i < 2; i++) { + uint32_t hash = address_to_hash(ptr, seeds[i], multipliers[i]); + for (int j = 0; j < (K/2); j++) { + uint8_t bits = hash & 255; + bloom->bits[bits >> 5] |= (1 << (bits&31)); + hash >>= 8; + } } } @@ -999,7 +1026,7 @@ add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) { _PyBloomFilter hashes; address_to_hashes(obj, &hashes); - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { bloom->bits[i] |= hashes.bits[i]; } } @@ -1007,7 +1034,7 @@ add_to_bloom_filter(_PyBloomFilter *bloom, PyObject *obj) static bool bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes) { - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) { return false; } @@ -1067,7 +1094,7 @@ void _Py_ExecutorInit(_PyExecutorObject *executor) { executor->vm_data.valid = true; - for (int i = 0; i < 8; i++) { + for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { executor->vm_data.bloom.bits[i] = 0; } link_executor(executor); @@ -1096,6 +1123,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) _PyBloomFilter hashes; address_to_hashes(obj, &hashes); /* Walk the list of executors */ + /* TO DO -- Use a tree to avoid traversing as many objects */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next;