Skip to content

Commit c756078

Browse files
authored
Merge pull request #1586 from bnjbvr/address-sp-add
aarch64: emit SP copies when SP is involved in an explicit add during address lowering
2 parents d1be0c1 + 19b5b0c commit c756078

File tree

6 files changed

+139
-31
lines changed

6 files changed

+139
-31
lines changed

build.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,7 @@ fn should_panic(testsuite: &str, testname: &str) -> bool {
237237
}
238238
match (testsuite, testname) {
239239
// FIXME(#1521)
240-
("misc_testsuite", "func_400_params")
241-
| ("simd", _)
242-
| ("multi_value", "call")
243-
| ("spec_testsuite", "call") => true,
244-
240+
("simd", _) => true,
245241
_ => false,
246242
}
247243
}

cranelift/codegen/src/isa/aarch64/abi.rs

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ fn load_stack_from_fp(fp_offset: i64, into_reg: Writable<Reg>, ty: Type) -> Inst
295295
}
296296

297297
fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
298+
debug_assert!(match &mem {
299+
MemArg::SPOffset(off) => SImm9::maybe_from_i64(*off).is_some(),
300+
_ => true,
301+
});
298302
match ty {
299303
types::B1
300304
| types::B8
@@ -323,6 +327,50 @@ fn store_stack(mem: MemArg, from_reg: Reg, ty: Type) -> Inst {
323327
}
324328
}
325329

330+
fn store_stack_fp(fp_offset: i64, from_reg: Reg, ty: Type) -> Inst {
331+
store_stack(MemArg::FPOffset(fp_offset), from_reg, ty)
332+
}
333+
334+
fn store_stack_sp<C: LowerCtx<I = Inst>>(
335+
ctx: &mut C,
336+
sp_offset: i64,
337+
from_reg: Reg,
338+
ty: Type,
339+
) -> Vec<Inst> {
340+
if SImm9::maybe_from_i64(sp_offset).is_some() {
341+
vec![store_stack(MemArg::SPOffset(sp_offset), from_reg, ty)]
342+
} else {
343+
// mem_finalize will try to generate an add, but in an addition, x31 is the zero register,
344+
// not sp! So we have to synthesize the full add here.
345+
let tmp1 = ctx.tmp(RegClass::I64, I64);
346+
let tmp2 = ctx.tmp(RegClass::I64, I64);
347+
let mut result = Vec::new();
348+
// tmp1 := sp
349+
result.push(Inst::Mov {
350+
rd: tmp1,
351+
rm: stack_reg(),
352+
});
353+
// tmp2 := offset
354+
for inst in Inst::load_constant(tmp2, sp_offset as u64) {
355+
result.push(inst);
356+
}
357+
// tmp1 := add tmp1, tmp2
358+
result.push(Inst::AluRRR {
359+
alu_op: ALUOp::Add64,
360+
rd: tmp1,
361+
rn: tmp1.to_reg(),
362+
rm: tmp2.to_reg(),
363+
});
364+
// Actual store.
365+
result.push(store_stack(
366+
MemArg::Unscaled(tmp1.to_reg(), SImm9::maybe_from_i64(0).unwrap()),
367+
from_reg,
368+
ty,
369+
));
370+
result
371+
}
372+
}
373+
326374
fn is_callee_save(call_conv: isa::CallConv, r: RealReg) -> bool {
327375
if call_conv.extends_baldrdash() {
328376
match r.get_class() {
@@ -523,8 +571,8 @@ impl ABIBody for AArch64ABIBody {
523571
}
524572
_ => {}
525573
};
526-
ret.push(store_stack(
527-
MemArg::FPOffset(off + self.frame_size()),
574+
ret.push(store_stack_fp(
575+
off + self.frame_size(),
528576
from_reg.to_reg(),
529577
ty,
530578
))
@@ -566,7 +614,7 @@ impl ABIBody for AArch64ABIBody {
566614
// Offset from beginning of stackslot area, which is at FP - stackslots_size.
567615
let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
568616
let fp_off: i64 = -(self.stackslots_size as i64) + stack_off + (offset as i64);
569-
store_stack(MemArg::FPOffset(fp_off), from_reg, ty)
617+
store_stack_fp(fp_off, from_reg, ty)
570618
}
571619

572620
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Inst {
@@ -596,7 +644,7 @@ impl ABIBody for AArch64ABIBody {
596644
let islot = slot.get() as i64;
597645
let ty_size = self.get_spillslot_size(from_reg.get_class(), ty) * 8;
598646
let fp_off: i64 = -(self.stackslots_size as i64) - (8 * islot) - ty_size as i64;
599-
store_stack(MemArg::FPOffset(fp_off), from_reg, ty)
647+
store_stack_fp(fp_off, from_reg, ty)
600648
}
601649

602650
fn gen_prologue(&mut self) -> Vec<Inst> {
@@ -927,10 +975,19 @@ impl ABICall for AArch64ABICall {
927975
adjust_stack(self.sig.stack_arg_space as u64, /* is_sub = */ false)
928976
}
929977

930-
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Inst {
978+
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
979+
&self,
980+
ctx: &mut C,
981+
idx: usize,
982+
from_reg: Reg,
983+
) -> Vec<Inst> {
931984
match &self.sig.args[idx] {
932-
&ABIArg::Reg(reg, ty) => Inst::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty),
933-
&ABIArg::Stack(off, ty) => store_stack(MemArg::SPOffset(off), from_reg, ty),
985+
&ABIArg::Reg(reg, ty) => vec![Inst::gen_move(
986+
Writable::from_reg(reg.to_reg()),
987+
from_reg,
988+
ty,
989+
)],
990+
&ABIArg::Stack(off, ty) => store_stack_sp(ctx, off, from_reg, ty),
934991
}
935992
}
936993

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ pub fn mem_finalize(insn_off: CodeOffset, mem: &MemArg) -> (Vec<Inst>, MemArg) {
3535
let mem = MemArg::Unscaled(basereg, simm9);
3636
(vec![], mem)
3737
} else {
38+
// In an addition, x31 is the zero register, not sp; we have only one temporary
39+
// so we can't do the proper add here.
40+
debug_assert_ne!(
41+
basereg,
42+
stack_reg(),
43+
"should have diverted SP before mem_finalize"
44+
);
45+
3846
let tmp = writable_spilltmp_reg();
3947
let mut const_insts = Inst::load_constant(tmp, off as u64);
4048
let add_inst = Inst::AluRRR {
@@ -363,7 +371,11 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
363371
ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
364372
_ => 0b000000,
365373
};
366-
assert_ne!(writable_stack_reg(), rd);
374+
debug_assert_ne!(writable_stack_reg(), rd);
375+
// The stack pointer is the zero register in this context, so this might be an
376+
// indication that something is wrong.
377+
debug_assert_ne!(stack_reg(), rn);
378+
debug_assert_ne!(stack_reg(), rm);
367379
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
368380
}
369381
&Inst::AluRRRR {
@@ -818,11 +830,25 @@ impl<O: MachSectionOutput> MachInstEmit<O> for Inst {
818830
&Inst::Mov { rd, rm } => {
819831
assert!(rd.to_reg().get_class() == rm.get_class());
820832
assert!(rm.get_class() == RegClass::I64);
833+
821834
// MOV to SP is interpreted as MOV to XZR instead. And our codegen
822835
// should never MOV to XZR.
823-
assert!(machreg_to_gpr(rd.to_reg()) != 31);
824-
// Encoded as ORR rd, rm, zero.
825-
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
836+
assert!(rd.to_reg() != stack_reg());
837+
838+
if rm == stack_reg() {
839+
// We can't use ORR here, so use an `add rd, sp, #0` instead.
840+
let imm12 = Imm12::maybe_from_u64(0).unwrap();
841+
sink.put4(enc_arith_rr_imm12(
842+
0b100_10001,
843+
imm12.shift_bits(),
844+
imm12.imm_bits(),
845+
rm,
846+
rd,
847+
));
848+
} else {
849+
// Encoded as ORR rd, rm, zero.
850+
sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
851+
}
826852
}
827853
&Inst::Mov32 { rd, rm } => {
828854
// MOV to SP is interpreted as MOV to XZR instead. And our codegen

cranelift/codegen/src/isa/aarch64/inst/mod.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1719,6 +1719,9 @@ impl MachInst for Inst {
17191719

17201720
fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
17211721
match self {
1722+
// TODO a regalloc assertion is triggered if we don't have this, see also #1586 on
1723+
// wasmtime, as well as https://github.com/bytecodealliance/regalloc.rs/issues/52.
1724+
&Inst::Mov { rm, .. } if rm == stack_reg() => None,
17221725
&Inst::Mov { rd, rm } => Some((rd, rm)),
17231726
&Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
17241727
_ => None,
@@ -2297,35 +2300,41 @@ impl ShowWithRRU for Inst {
22972300
}
22982301
&Inst::FpuLoad32 { rd, ref mem, .. } => {
22992302
let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size32);
2300-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
2301-
format!("ldr {}, {}", rd, mem)
2303+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2304+
let mem = mem.show_rru(mb_rru);
2305+
format!("{}ldr {}, {}", mem_str, rd, mem)
23022306
}
23032307
&Inst::FpuLoad64 { rd, ref mem, .. } => {
23042308
let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size64);
2305-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
2306-
format!("ldr {}, {}", rd, mem)
2309+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2310+
let mem = mem.show_rru(mb_rru);
2311+
format!("{}ldr {}, {}", mem_str, rd, mem)
23072312
}
23082313
&Inst::FpuLoad128 { rd, ref mem, .. } => {
23092314
let rd = rd.to_reg().show_rru(mb_rru);
23102315
let rd = "q".to_string() + &rd[1..];
2311-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
2312-
format!("ldr {}, {}", rd, mem)
2316+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2317+
let mem = mem.show_rru(mb_rru);
2318+
format!("{}ldr {}, {}", mem_str, rd, mem)
23132319
}
23142320
&Inst::FpuStore32 { rd, ref mem, .. } => {
23152321
let rd = show_freg_sized(rd, mb_rru, InstSize::Size32);
2316-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 4);
2317-
format!("str {}, {}", rd, mem)
2322+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2323+
let mem = mem.show_rru(mb_rru);
2324+
format!("{}str {}, {}", mem_str, rd, mem)
23182325
}
23192326
&Inst::FpuStore64 { rd, ref mem, .. } => {
23202327
let rd = show_freg_sized(rd, mb_rru, InstSize::Size64);
2321-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
2322-
format!("str {}, {}", rd, mem)
2328+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2329+
let mem = mem.show_rru(mb_rru);
2330+
format!("{}str {}, {}", mem_str, rd, mem)
23232331
}
23242332
&Inst::FpuStore128 { rd, ref mem, .. } => {
23252333
let rd = rd.show_rru(mb_rru);
23262334
let rd = "q".to_string() + &rd[1..];
2327-
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
2328-
format!("str {}, {}", rd, mem)
2335+
let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru);
2336+
let mem = mem.show_rru(mb_rru);
2337+
format!("{}str {}, {}", mem_str, rd, mem)
23292338
}
23302339
&Inst::LoadFpuConst32 { rd, const_data } => {
23312340
let rd = show_freg_sized(rd.to_reg(), mb_rru, InstSize::Size32);

cranelift/codegen/src/isa/aarch64/lower.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,20 @@ fn lower_address<C: LowerCtx<I = Inst>>(
595595
// Add each addend to the address.
596596
for addend in addends {
597597
let reg = input_to_reg(ctx, *addend, NarrowValueMode::ZeroExtend64);
598+
599+
// In an addition, the stack register is the zero register, so divert it to another
600+
// register just before doing the actual add.
601+
let reg = if reg == stack_reg() {
602+
let tmp = ctx.tmp(RegClass::I64, I64);
603+
ctx.emit(Inst::Mov {
604+
rd: tmp,
605+
rm: stack_reg(),
606+
});
607+
tmp.to_reg()
608+
} else {
609+
reg
610+
};
611+
598612
ctx.emit(Inst::AluRRR {
599613
alu_op: ALUOp::Add64,
600614
rd: addr.clone(),
@@ -1970,7 +1984,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
19701984
assert!(inputs.len() == abi.num_args());
19711985
for (i, input) in inputs.iter().enumerate() {
19721986
let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
1973-
ctx.emit(abi.gen_copy_reg_to_arg(i, arg_reg));
1987+
for inst in abi.gen_copy_reg_to_arg(ctx, i, arg_reg) {
1988+
ctx.emit(inst);
1989+
}
19741990
}
19751991
for inst in abi.gen_call().into_iter() {
19761992
ctx.emit(inst);

cranelift/codegen/src/machinst/abi.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,13 @@ pub trait ABICall {
132132
/// Get the number of arguments expected.
133133
fn num_args(&self) -> usize;
134134

135-
/// Save the clobbered registers.
136135
/// Copy an argument value from a source register, prior to the call.
137-
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> Self::I;
136+
fn gen_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
137+
&self,
138+
ctx: &mut C,
139+
idx: usize,
140+
from_reg: Reg,
141+
) -> Vec<Self::I>;
138142

139143
/// Copy a return value into a destination register, after the call returns.
140144
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;

0 commit comments

Comments
 (0)