Skip to content

Commit 1583c6e

Browse files
authored
GH-100143: Improve collecting pystats for parts of runs (GH-100144)
* pystats off by default * Add -Xpystats flag * Always dump pystats, even if turned off
1 parent e4ea33b commit 1583c6e

File tree

4 files changed

+48
-25
lines changed

4 files changed

+48
-25
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
When built with ``--enable-pystats``, stats collection is now off by
2+
default. To enable it early at startup, pass the ``-Xpystats`` flag. Stats
3+
are now always dumped, even if switched off.

Python/initconfig.c

+14-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,14 @@ The following implementation-specific options are available:\n\
129129
\n\
130130
-X int_max_str_digits=number: limit the size of int<->str conversions.\n\
131131
This helps avoid denial of service attacks when parsing untrusted data.\n\
132-
The default is sys.int_info.default_max_str_digits. 0 disables.";
132+
The default is sys.int_info.default_max_str_digits. 0 disables."
133+
134+
#ifdef Py_STATS
135+
"\n\
136+
\n\
137+
-X pystats: Enable pystats collection at startup."
138+
#endif
139+
;
133140

134141
/* Envvars that don't have equivalent command-line options are listed first */
135142
static const char usage_envvars[] =
@@ -2186,6 +2193,12 @@ config_read(PyConfig *config, int compute_path_config)
21862193
config->show_ref_count = 1;
21872194
}
21882195

2196+
#ifdef Py_STATS
2197+
if (config_get_xoption(config, L"pystats")) {
2198+
_py_stats = &_py_stats_struct;
2199+
}
2200+
#endif
2201+
21892202
status = config_read_complex_options(config);
21902203
if (_PyStatus_EXCEPTION(status)) {
21912204
return status;

Python/specialize.c

+2-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#ifdef Py_STATS
2020
PyStats _py_stats_struct = { 0 };
21-
PyStats *_py_stats = &_py_stats_struct;
21+
PyStats *_py_stats = NULL;
2222

2323
#define ADD_STAT_TO_DICT(res, field) \
2424
do { \
@@ -205,9 +205,6 @@ _Py_StatsClear(void)
205205
void
206206
_Py_PrintSpecializationStats(int to_file)
207207
{
208-
if (_py_stats == NULL) {
209-
return;
210-
}
211208
FILE *out = stderr;
212209
if (to_file) {
213210
/* Write to a file instead of stderr. */
@@ -238,7 +235,7 @@ _Py_PrintSpecializationStats(int to_file)
238235
else {
239236
fprintf(out, "Specialization stats:\n");
240237
}
241-
print_stats(out, _py_stats);
238+
print_stats(out, &_py_stats_struct);
242239
if (out != stderr) {
243240
fclose(out);
244241
}

Tools/scripts/summarize_stats.py

+29-19
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@
3434

3535
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
3636

37+
def format_ratio(num, den):
38+
"""
39+
Format a ratio as a percentage. When the denominator is 0, returns the empty
40+
string.
41+
"""
42+
if den == 0:
43+
return ""
44+
else:
45+
return f"{num/den:.01%}"
46+
3747
def join_rows(a_rows, b_rows):
3848
"""
3949
Joins two tables together, side-by-side, where the first column in each is a
@@ -87,7 +97,7 @@ def calculate_specialization_stats(family_stats, total):
8797
continue
8898
else:
8999
label = key
90-
rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
100+
rows.append((f"{label:>12}", f"{family_stats[key]:>12}", format_ratio(family_stats[key], total)))
91101
return rows
92102

93103
def calculate_specialization_success_failure(family_stats):
@@ -100,7 +110,7 @@ def calculate_specialization_success_failure(family_stats):
100110
label = key[len("specialization."):]
101111
label = label[0].upper() + label[1:]
102112
val = family_stats.get(key, 0)
103-
rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
113+
rows.append((label, val, format_ratio(val, total_attempts)))
104114
return rows
105115

106116
def calculate_specialization_failure_kinds(name, family_stats, defines):
@@ -118,7 +128,7 @@ def calculate_specialization_failure_kinds(name, family_stats, defines):
118128
for value, index in failures:
119129
if not value:
120130
continue
121-
rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
131+
rows.append((kind_to_text(index, defines, name), value, format_ratio(value, total_failures)))
122132
return rows
123133

124134
def print_specialization_stats(name, family_stats, defines):
@@ -318,11 +328,11 @@ def calculate_execution_counts(opcode_stats, total):
318328
for (count, name, miss) in counts:
319329
cumulative += count
320330
if miss:
321-
miss = f"{100*miss/count:0.1f}%"
331+
miss = format_ratio(miss, count)
322332
else:
323333
miss = ""
324-
rows.append((name, count, f"{100*count/total:0.1f}%",
325-
f"{100*cumulative/total:0.1f}%", miss))
334+
rows.append((name, count, format_ratio(count, total),
335+
format_ratio(cumulative, total), miss))
326336
return rows
327337

328338
def emit_execution_counts(opcode_stats, total):
@@ -386,9 +396,9 @@ def emit_comparative_specialization_stats(base_opcode_stats, head_opcode_stats):
386396
def calculate_specialization_effectiveness(opcode_stats, total):
387397
basic, not_specialized, specialized = categorized_counts(opcode_stats)
388398
return [
389-
("Basic", basic, f"{basic*100/total:0.1f}%"),
390-
("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
391-
("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
399+
("Basic", basic, format_ratio(basic, total)),
400+
("Not specialized", not_specialized, format_ratio(not_specialized, total)),
401+
("Specialized", specialized, format_ratio(specialized, total)),
392402
]
393403

394404
def emit_specialization_overview(opcode_stats, total):
@@ -405,7 +415,7 @@ def emit_specialization_overview(opcode_stats, total):
405415
counts.sort(reverse=True)
406416
if total:
407417
with Section(f"{title} by instruction", 3):
408-
rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ]
418+
rows = [ (name, count, format_ratio(count, total)) for (count, name) in counts[:10] ]
409419
emit_table(("Name", "Count:", "Ratio:"), rows)
410420

411421
def emit_comparative_specialization_overview(base_opcode_stats, base_total, head_opcode_stats, head_total):
@@ -432,15 +442,15 @@ def calculate_call_stats(stats):
432442
rows = []
433443
for key, value in stats.items():
434444
if "Calls to" in key:
435-
rows.append((key, value, f"{100*value/total:0.1f}%"))
445+
rows.append((key, value, format_ratio(value, total)))
436446
elif key.startswith("Calls "):
437447
name, index = key[:-1].split("[")
438448
index = int(index)
439449
label = name + " (" + pretty(defines[index][0]) + ")"
440-
rows.append((label, value, f"{100*value/total:0.1f}%"))
450+
rows.append((label, value, format_ratio(value, total)))
441451
for key, value in stats.items():
442452
if key.startswith("Frame"):
443-
rows.append((key, value, f"{100*value/total:0.1f}%"))
453+
rows.append((key, value, format_ratio(value, total)))
444454
return rows
445455

446456
def emit_call_stats(stats):
@@ -468,13 +478,13 @@ def calculate_object_stats(stats):
468478
for key, value in stats.items():
469479
if key.startswith("Object"):
470480
if "materialize" in key:
471-
ratio = f"{100*value/total_materializations:0.1f}%"
481+
ratio = format_ratio(value, total_materializations)
472482
elif "allocations" in key:
473-
ratio = f"{100*value/total_allocations:0.1f}%"
483+
ratio = format_ratio(value, total_allocations)
474484
elif "increfs" in key:
475-
ratio = f"{100*value/total_increfs:0.1f}%"
485+
ratio = format_ratio(value, total_increfs)
476486
elif "decrefs" in key:
477-
ratio = f"{100*value/total_decrefs:0.1f}%"
487+
ratio = format_ratio(value, total_decrefs)
478488
else:
479489
ratio = ""
480490
label = key[6:].strip()
@@ -517,8 +527,8 @@ def emit_pair_counts(opcode_stats, total):
517527
for (count, pair) in itertools.islice(pair_counts, 100):
518528
i, j = pair
519529
cumulative += count
520-
rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
521-
f"{100*cumulative/total:0.1f}%"))
530+
rows.append((opname[i] + " " + opname[j], count, format_ratio(count, total),
531+
format_ratio(cumulative, total)))
522532
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
523533
rows
524534
)

0 commit comments

Comments
 (0)