Skip to content

Commit 4b24c15

Browse files
committed
Merge branch 'main' into tier-2-tos-caching
2 parents 4fec38d + 4629567 commit 4b24c15

File tree

10 files changed

+192
-61
lines changed

10 files changed

+192
-61
lines changed

Lib/profiling/sampling/_heatmap_assets/heatmap_shared.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,18 @@ function intensityToColor(intensity) {
3838
const rootStyle = getComputedStyle(document.documentElement);
3939
return rootStyle.getPropertyValue(`--heat-${level}`).trim();
4040
}
41+
42+
// ============================================================================
43+
// Favicon (Reuse logo image as favicon)
44+
// ============================================================================
45+
46+
(function() {
47+
const logo = document.querySelector('.brand-logo img');
48+
if (logo) {
49+
const favicon = document.createElement('link');
50+
favicon.rel = 'icon';
51+
favicon.type = 'image/png';
52+
favicon.href = logo.src;
53+
document.head.appendChild(favicon);
54+
}
55+
})();
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add ``LDVERSION`` and ``EXE`` to the ``base_interpreter`` value of
2+
``build-details.json``.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Decrease the size of the generated stencils and the runtime JIT code. Patch by Diego Russo.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix reference counting when adjacent literal parts are merged while constructing
2+
:class:`string.templatelib.Template`, preventing the displaced string object
3+
from leaking.

Objects/templateobject.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,14 @@ template_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
148148
if (last_was_str) {
149149
PyObject *laststring = PyTuple_GET_ITEM(strings, stringsidx - 1);
150150
PyObject *concat = PyUnicode_Concat(laststring, item);
151-
Py_DECREF(laststring);
152151
if (!concat) {
153152
Py_DECREF(strings);
154153
Py_DECREF(interpolations);
155154
return NULL;
156155
}
156+
/* Replace laststring with concat */
157157
PyTuple_SET_ITEM(strings, stringsidx - 1, concat);
158+
Py_DECREF(laststring);
158159
}
159160
else {
160161
PyTuple_SET_ITEM(strings, stringsidx++, Py_NewRef(item));

Programs/_testembed.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,15 +2063,20 @@ static int check_use_frozen_modules(const char *rawval)
20632063
if (rawval == NULL) {
20642064
wcscpy(optval, L"frozen_modules");
20652065
}
2066-
else if (swprintf(optval, 100,
2067-
#if defined(_MSC_VER)
2068-
L"frozen_modules=%S",
2069-
#else
2070-
L"frozen_modules=%s",
2071-
#endif
2072-
rawval) < 0) {
2073-
error("rawval is too long");
2074-
return -1;
2066+
else {
2067+
wchar_t *val = Py_DecodeLocale(rawval, NULL);
2068+
if (val == NULL) {
2069+
error("unable to decode TESTFROZEN");
2070+
return -1;
2071+
}
2072+
wcscpy(optval, L"frozen_modules=");
2073+
if ((wcslen(optval) + wcslen(val)) >= Py_ARRAY_LENGTH(optval)) {
2074+
error("TESTFROZEN is too long");
2075+
PyMem_RawFree(val);
2076+
return -1;
2077+
}
2078+
wcscat(optval, val);
2079+
PyMem_RawFree(val);
20752080
}
20762081

20772082
PyConfig config;

Python/jit.c

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -134,18 +134,20 @@ mark_executable(unsigned char *memory, size_t size)
134134

135135
// JIT compiler stuff: /////////////////////////////////////////////////////////
136136

137-
#define SYMBOL_MASK_WORDS 4
137+
#define GOT_SLOT_SIZE sizeof(uintptr_t)
138+
#define SYMBOL_MASK_WORDS 8
138139

139140
typedef uint32_t symbol_mask[SYMBOL_MASK_WORDS];
140141

141142
typedef struct {
142143
unsigned char *mem;
143144
symbol_mask mask;
144145
size_t size;
145-
} trampoline_state;
146+
} symbol_state;
146147

147148
typedef struct {
148-
trampoline_state trampolines;
149+
symbol_state trampolines;
150+
symbol_state got_symbols;
149151
uintptr_t instruction_starts[UOP_MAX_TRACE_LENGTH];
150152
} jit_state;
151153

@@ -210,6 +212,33 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
210212
// - x86_64-unknown-linux-gnu:
211213
// - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp
212214

215+
216+
// Get the symbol slot memory location for a given symbol ordinal.
217+
static unsigned char *
218+
get_symbol_slot(int ordinal, symbol_state *state, int size)
219+
{
220+
const uint32_t symbol_mask = 1U << (ordinal % 32);
221+
const uint32_t state_mask = state->mask[ordinal / 32];
222+
assert(symbol_mask & state_mask);
223+
224+
// Count the number of set bits in the symbol mask lower than ordinal
225+
size_t index = _Py_popcount32(state_mask & (symbol_mask - 1));
226+
for (int i = 0; i < ordinal / 32; i++) {
227+
index += _Py_popcount32(state->mask[i]);
228+
}
229+
230+
unsigned char *slot = state->mem + index * size;
231+
assert((size_t)(index + 1) * size <= state->size);
232+
return slot;
233+
}
234+
235+
// Return the address of the GOT slot for the requested symbol ordinal.
236+
static uintptr_t
237+
got_symbol_address(int ordinal, jit_state *state)
238+
{
239+
return (uintptr_t)get_symbol_slot(ordinal, &state->got_symbols, GOT_SLOT_SIZE);
240+
}
241+
213242
// Many of these patches are "relaxing", meaning that they can rewrite the
214243
// code they're patching to be more efficient (like turning a 64-bit memory
215244
// load into a 32-bit immediate load). These patches have an "x" in their name.
@@ -452,6 +481,7 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
452481
patch_32r(location, value);
453482
}
454483

484+
void patch_got_symbol(jit_state *state, int ordinal);
455485
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
456486
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
457487

@@ -470,23 +500,13 @@ void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *st
470500
#define DATA_ALIGN 1
471501
#endif
472502

473-
// Get the trampoline memory location for a given symbol ordinal.
474-
static unsigned char *
475-
get_trampoline_slot(int ordinal, jit_state *state)
503+
// Populate the GOT entry for the given symbol ordinal with its resolved address.
504+
void
505+
patch_got_symbol(jit_state *state, int ordinal)
476506
{
477-
const uint32_t symbol_mask = 1 << (ordinal % 32);
478-
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
479-
assert(symbol_mask & trampoline_mask);
480-
481-
// Count the number of set bits in the trampoline mask lower than ordinal
482-
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
483-
for (int i = 0; i < ordinal / 32; i++) {
484-
index += _Py_popcount32(state->trampolines.mask[i]);
485-
}
486-
487-
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
488-
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
489-
return trampoline;
507+
uint64_t value = (uintptr_t)symbols_map[ordinal];
508+
unsigned char *location = (unsigned char *)get_symbol_slot(ordinal, &state->got_symbols, GOT_SLOT_SIZE);
509+
patch_64(location, value);
490510
}
491511

492512
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
@@ -506,8 +526,7 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
506526
}
507527

508528
// Out of range - need a trampoline
509-
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
510-
529+
uint32_t *p = (uint32_t *)get_symbol_slot(ordinal, &state->trampolines, TRAMPOLINE_SIZE);
511530

512531
/* Generate the trampoline
513532
0: 58000048 ldr x8, 8
@@ -537,7 +556,7 @@ patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
537556
}
538557

539558
// Out of range - need a trampoline
540-
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
559+
unsigned char *trampoline = get_symbol_slot(ordinal, &state->trampolines, TRAMPOLINE_SIZE);
541560

542561
/* Generate the trampoline (14 bytes, padded to 16):
543562
0: ff 25 00 00 00 00 jmp *(%rip)
@@ -579,21 +598,26 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
579598
code_size += group->code_size;
580599
data_size += group->data_size;
581600
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
601+
combine_symbol_mask(group->got_mask, state.got_symbols.mask);
582602
}
583603
group = &stencil_groups[_FATAL_ERROR_r00];
584604
code_size += group->code_size;
585605
data_size += group->data_size;
586606
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
607+
combine_symbol_mask(group->got_mask, state.got_symbols.mask);
587608
// Calculate the size of the trampolines required by the whole trace
588609
for (size_t i = 0; i < Py_ARRAY_LENGTH(state.trampolines.mask); i++) {
589610
state.trampolines.size += _Py_popcount32(state.trampolines.mask[i]) * TRAMPOLINE_SIZE;
590611
}
612+
for (size_t i = 0; i < Py_ARRAY_LENGTH(state.got_symbols.mask); i++) {
613+
state.got_symbols.size += _Py_popcount32(state.got_symbols.mask[i]) * GOT_SLOT_SIZE;
614+
}
591615
// Round up to the nearest page:
592616
size_t page_size = get_page_size();
593617
assert((page_size & (page_size - 1)) == 0);
594618
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
595-
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
596-
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
619+
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size) & (page_size - 1));
620+
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size + padding;
597621
unsigned char *memory = jit_alloc(total_size);
598622
if (memory == NULL) {
599623
return -1;
@@ -603,6 +627,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
603627
OPT_STAT_ADD(jit_code_size, code_size);
604628
OPT_STAT_ADD(jit_trampoline_size, state.trampolines.size);
605629
OPT_STAT_ADD(jit_data_size, data_size);
630+
OPT_STAT_ADD(jit_got_size, state.got_symbols.size);
606631
OPT_STAT_ADD(jit_padding_size, padding);
607632
OPT_HIST(total_size, trace_total_memory_hist);
608633
// Update the offsets of each instruction:
@@ -614,6 +639,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
614639
state.trampolines.mem = memory + code_size;
615640
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
616641
assert(trace[0].opcode == _START_EXECUTOR_r00 || trace[0].opcode == _COLD_EXIT_r00 || trace[0].opcode == _COLD_DYNAMIC_EXIT_r00);
642+
state.got_symbols.mem = data + data_size;
617643
for (size_t i = 0; i < length; i++) {
618644
const _PyUOpInstruction *instruction = &trace[i];
619645
group = &stencil_groups[instruction->opcode];
@@ -654,19 +680,21 @@ compile_trampoline(void)
654680
code_size += group->code_size;
655681
data_size += group->data_size;
656682
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
683+
combine_symbol_mask(group->got_mask, state.got_symbols.mask);
657684
// Round up to the nearest page:
658685
size_t page_size = get_page_size();
659686
assert((page_size & (page_size - 1)) == 0);
660687
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
661-
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
662-
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
688+
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size) & (page_size - 1));
689+
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size + padding;
663690
unsigned char *memory = jit_alloc(total_size);
664691
if (memory == NULL) {
665692
return NULL;
666693
}
667694
unsigned char *code = memory;
668695
state.trampolines.mem = memory + code_size;
669696
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
697+
state.got_symbols.mem = data + data_size;
670698
// Compile the shim, which handles converting between the native
671699
// calling convention and the calling convention used by jitted code
672700
// (which may be different for efficiency reasons).

Tools/build/generate-build-details.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ def generate_data(schema_version: str) -> collections.defaultdict[str, Any]:
4848
#data['base_interpreter'] = sys._base_executable
4949
data['base_interpreter'] = os.path.join(
5050
sysconfig.get_path('scripts'),
51-
'python' + sysconfig.get_config_var('VERSION'),
51+
"python"
52+
+ sysconfig.get_config_var('LDVERSION')
53+
+ sysconfig.get_config_var('EXE'),
5254
)
5355
data['platform'] = sysconfig.get_platform()
5456

0 commit comments

Comments
 (0)