Skip to content

Commit 88941d6

Browse files
gh-106581: Fix two bugs in the code generator's copy optimization (#108380)
I was comparing the last preceding poke with the *last* peek, rather than the *first* peek. Unfortunately this bug obscured another bug: When the last preceding poke is UNUSED, the first peek disappears, leaving the variable unassigned. This is how I fixed it: - Rename CopyEffect to CopyItem. - Change CopyItem to contain StackItems instead of StackEffects. - Update those StackItems when adjusting the manager higher or lower. - Assert that those StackItems' offsets are equivalent. - Other clever things. --------- Co-authored-by: Irit Katriel <[email protected]>
1 parent c494fb3 commit 88941d6

File tree

2 files changed

+62
-20
lines changed

2 files changed

+62
-20
lines changed

Python/generated_cases.c.h

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Tools/cases_generator/stacking.py

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,27 @@ def as_index(self) -> str:
8383
terms = self.as_terms()
8484
return make_index(terms)
8585

86+
def equivalent_to(self, other: "StackOffset") -> bool:
87+
if self.deep == other.deep and self.high == other.high:
88+
return True
89+
deep = list(self.deep)
90+
for x in other.deep:
91+
try:
92+
deep.remove(x)
93+
except ValueError:
94+
return False
95+
if deep:
96+
return False
97+
high = list(self.high)
98+
for x in other.high:
99+
try:
100+
high.remove(x)
101+
except ValueError:
102+
return False
103+
if high:
104+
return False
105+
return True
106+
86107

87108
def make_index(terms: list[tuple[str, str]]) -> str:
88109
# Produce an index expression from the terms honoring PEP 8,
@@ -131,9 +152,9 @@ def as_stack_effect(self, lax: bool = False) -> StackEffect:
131152

132153

133154
@dataclasses.dataclass
134-
class CopyEffect:
135-
src: StackEffect
136-
dst: StackEffect
155+
class CopyItem:
156+
src: StackItem
157+
dst: StackItem
137158

138159

139160
class EffectManager:
@@ -143,7 +164,7 @@ class EffectManager:
143164
active_caches: list[ActiveCacheEffect]
144165
peeks: list[StackItem]
145166
pokes: list[StackItem]
146-
copies: list[CopyEffect] # See merge()
167+
copies: list[CopyItem] # See merge()
147168
# Track offsets from stack pointer
148169
min_offset: StackOffset
149170
final_offset: StackOffset
@@ -179,16 +200,18 @@ def __init__(
179200
while (
180201
pred.pokes
181202
and self.peeks
182-
and pred.pokes[-1].effect == self.peeks[-1].effect
203+
and pred.pokes[-1].effect == self.peeks[0].effect
183204
):
184-
src = pred.pokes.pop(-1).effect
185-
dst = self.peeks.pop(0).effect
186-
pred.final_offset.deeper(src)
187-
if dst.name != UNUSED:
188-
destinations.add(dst.name)
189-
if dst.name != src.name:
190-
sources.add(src.name)
191-
self.copies.append(CopyEffect(src, dst))
205+
src = pred.pokes.pop(-1)
206+
dst = self.peeks.pop(0)
207+
assert src.offset.equivalent_to(dst.offset), (src, dst)
208+
pred.final_offset.deeper(src.effect)
209+
if dst.effect.name != src.effect.name:
210+
if dst.effect.name != UNUSED:
211+
destinations.add(dst.effect.name)
212+
if src.effect.name != UNUSED:
213+
sources.add(src.effect.name)
214+
self.copies.append(CopyItem(src, dst))
192215
# TODO: Turn this into an error (pass an Analyzer instance?)
193216
assert sources & destinations == set(), (
194217
pred.instr.name,
@@ -202,11 +225,27 @@ def __init__(
202225
else:
203226
pred = None # Break
204227

228+
# Fix up patterns of copies through UNUSED,
229+
# e.g. cp(a, UNUSED) + cp(UNUSED, b) -> cp(a, b).
230+
if any(copy.src.effect.name == UNUSED for copy in self.copies):
231+
pred = self.pred
232+
while pred is not None:
233+
for copy in self.copies:
234+
if copy.src.effect.name == UNUSED:
235+
for pred_copy in pred.copies:
236+
if pred_copy.dst == copy.src:
237+
copy.src = pred_copy.src
238+
break
239+
pred = pred.pred
240+
205241
def adjust_deeper(self, eff: StackEffect) -> None:
206242
for peek in self.peeks:
207243
peek.offset.deeper(eff)
208244
for poke in self.pokes:
209245
poke.offset.deeper(eff)
246+
for copy in self.copies:
247+
copy.src.offset.deeper(eff)
248+
copy.dst.offset.deeper(eff)
210249
self.min_offset.deeper(eff)
211250
self.final_offset.deeper(eff)
212251

@@ -215,6 +254,9 @@ def adjust_higher(self, eff: StackEffect) -> None:
215254
peek.offset.higher(eff)
216255
for poke in self.pokes:
217256
poke.offset.higher(eff)
257+
for copy in self.copies:
258+
copy.src.offset.higher(eff)
259+
copy.dst.offset.higher(eff)
218260
self.min_offset.higher(eff)
219261
self.final_offset.higher(eff)
220262

@@ -248,8 +290,8 @@ def add(eff: StackEffect) -> None:
248290
vars[eff.name] = eff
249291

250292
for copy in self.copies:
251-
add(copy.src)
252-
add(copy.dst)
293+
add(copy.src.effect)
294+
add(copy.dst.effect)
253295
for peek in self.peeks:
254296
add(peek.effect)
255297
for poke in self.pokes:
@@ -365,8 +407,11 @@ def write_components(
365407
out.emit(f"// {mgr.instr.name}")
366408

367409
for copy in mgr.copies:
368-
if copy.src.name != copy.dst.name:
369-
out.assign(copy.dst, copy.src)
410+
copy_src_effect = copy.src.effect
411+
if copy_src_effect.name != copy.dst.effect.name:
412+
if copy_src_effect.name == UNUSED:
413+
copy_src_effect = copy.src.as_stack_effect()
414+
out.assign(copy.dst.effect, copy_src_effect)
370415
for peek in mgr.peeks:
371416
out.assign(
372417
peek.effect,

0 commit comments

Comments
 (0)