Skip to content

Commit a144c17

Browse files
author
Peter Goodman
authored
Missing arch fused x86 call ret (#552)
* Adds missing arch identification from triples. Adds sub-arch identifications. Adds x86 decoding logic for fusing calls that target the next instruction, and when the next instruction is a return * Bug fix on call-ret fusing * Bug fix on call-ret fusing * Address PR feedback * Address PR feedback * Add 64-bit fusing support. Minor bug fix * Add 64-bit fusing support. Minor bug fix
1 parent a787375 commit a144c17

File tree

2 files changed

+78
-17
lines changed

2 files changed

+78
-17
lines changed

include/remill/Arch/Arch.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,16 @@ class Arch {
198198
}
199199

200200
// Decode an instruction.
201+
//
202+
// NOTE(pag): If you give `DecodeInstruction` a bunch of bytes, then it will
203+
// opportunistically look for opportunities to recognize some
204+
// simple idioms and fuse them (e.g. `call; pop` on x86,
205+
// `sethi; or` on sparc). If you don't want to decode idioms, then
206+
// one usage pattern to avoid them is to start with
207+
// `MinInstructionSize()` bytes, and if that fails to decode, then
208+
// walk up, one byte at a time, to `MaxInstructionSize(false)`
209+
// bytes being passed to the decoder, until you successfully decode
210+
// or ultimately fail.
201211
virtual bool DecodeInstruction(uint64_t address, std::string_view instr_bytes,
202212
Instruction &inst) const = 0;
203213

lib/Arch/X86/Arch.cpp

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -967,7 +967,7 @@ static bool IsAVX(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
967967
case XED_ISA_SET_AVX_VNNI:
968968
return true;
969969
default:
970-
return false;
970+
break;
971971
}
972972
switch (category) {
973973
case XED_CATEGORY_AVX:
@@ -1057,7 +1057,11 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
10571057
}
10581058
}
10591059

1060-
static const char *FusablePopReg(char byte) {
1060+
// Decode the destination register of a `pop <reg>`, where `byte` is the only
1061+
// byte of a 1-byte opcode. On 64-bit, the same decoded by maps to a 64-bit
1062+
// register. We apply a fixup below in `FillFusedCallPopRegOperands` to account
1063+
// for upgrading the register.
1064+
static const char *FusablePopReg32(char byte) {
10611065
switch (static_cast<uint8_t>(byte)) {
10621066
case 0x58: return "EAX";
10631067
case 0x59: return "ECX";
@@ -1073,15 +1077,40 @@ static const char *FusablePopReg(char byte) {
10731077
}
10741078
}
10751079

1080+
// Decode the destination register of a `pop r8` through `pop r10`, assuming
1081+
// that we've already decoded the `0x41` prefix, and `byte` is the second byte
1082+
// of the two-byte opcode.
1083+
static const char *FusablePopReg64(char byte) {
1084+
switch (static_cast<uint8_t>(byte)) {
1085+
case 0x58: return "R8";
1086+
case 0x59: return "R9";
1087+
case 0x5a: return "R10";
1088+
case 0x5b: return "R11";
1089+
case 0x5c: return "R12";
1090+
case 0x5d: return "R13";
1091+
case 0x5e: return "R14";
1092+
case 0x5f: return "R15";
1093+
default: return nullptr;
1094+
}
1095+
}
1096+
1097+
// Fill in the operands for a fused `call+pop` pair. This ends up acting like
1098+
// a `mov` variant, and the semantic is located in `DATAXFER`. Fusing of this
1099+
// pair is beneficial to avoid downstream users from treating the initial call
1100+
// as semantically being a function call, when really this is more of a move
1101+
// instruction. Downstream users like McSema and Anvill benefit from seeing this
1102+
// as a MOV-variant because of how they identify cross-references related to
1103+
// uses of the program counter (`PC`) register.
10761104
static void FillFusedCallPopRegOperands(Instruction &inst,
10771105
unsigned address_size,
1078-
const char *dest_reg_name32) {
1106+
const char *dest_reg_name,
1107+
unsigned call_inst_len) {
10791108
inst.operands.resize(2);
10801109
auto &dest = inst.operands[0];
10811110
auto &src = inst.operands[1];
10821111

10831112
dest.type = Operand::kTypeRegister;
1084-
dest.reg.name = dest_reg_name32;
1113+
dest.reg.name = dest_reg_name;
10851114
dest.reg.size = address_size;
10861115
dest.size = address_size;
10871116
dest.action = Operand::kActionWrite;
@@ -1092,14 +1121,26 @@ static void FillFusedCallPopRegOperands(Instruction &inst,
10921121
src.addr.address_size = address_size;
10931122
src.addr.base_reg.name = "PC";
10941123
src.addr.base_reg.size = address_size;
1095-
src.addr.displacement = (inst.next_pc - inst.pc) - 1u;
1124+
src.addr.displacement = static_cast<int64_t>(call_inst_len);
10961125
src.addr.kind = Operand::Address::kAddressCalculation;
10971126

10981127
if (32 == address_size) {
10991128
inst.function = "CALL_POP_FUSED_32";
11001129

11011130
} else {
11021131
inst.function = "CALL_POP_FUSED_64";
1132+
1133+
// Rename the register to be a 64-bit register. `pop eax` when decoded as
1134+
// a 32-bit instruction, and `pop rax` when decoded as a 64-bit instruction,
1135+
// both have the same binary representation. So for these cases, we store
1136+
// a 32-bit register name, such as `EAX` in `dest_reg_name`. If we're doing
1137+
// a fuse on 64-bit, then we want to upgrade the destination register to
1138+
// its `R`-prefixed variant, lest we accidentally discard the high 32 bits.
1139+
//
1140+
// For the case of `pop r8` et al. on 64 bit, `dest_reg_name` contains the
1141+
// 64-bit register name, and so the injection of `R` acts as a no-op.
1142+
//
1143+
// NOTE(pag): See `FusablePopReg32` and `FusablePopReg64`.
11031144
dest.reg.name[0] = 'R';
11041145
}
11051146
}
@@ -1117,13 +1158,13 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
11171158

11181159
xed_decoded_inst_t xedd_;
11191160
xed_decoded_inst_t *xedd = &xedd_;
1120-
auto mode = 32 == address_size ? &kXEDState32 : &kXEDState64;
1121-
1161+
const auto mode = 32 == address_size ? &kXEDState32 : &kXEDState64;
11221162
if (!DecodeXED(xedd, mode, inst_bytes, address)) {
11231163
return false;
11241164
}
11251165

11261166
auto len = xed_decoded_inst_get_length(xedd);
1167+
auto extra_len = 0u; // From fusing.
11271168
const auto iform = xed_decoded_inst_get_iform_enum(xedd);
11281169
const auto xedi = xed_decoded_inst_inst(xedd);
11291170
const auto num_operands = xed_decoded_inst_noperands(xedd);
@@ -1158,33 +1199,42 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
11581199
return false;
11591200
}
11601201

1161-
inst.category = CreateCategory(xedd);
1162-
inst.next_pc = address + len;
1163-
11641202
// Look for instruction fusing opportunities. For now, just `call; pop`.
11651203
const char *is_fused_call_pop = nullptr;
11661204
if (len < inst_bytes.size() &&
11671205
(iform == XED_IFORM_CALL_NEAR_RELBRd ||
11681206
iform == XED_IFORM_CALL_NEAR_RELBRz) &&
11691207
!xed_decoded_inst_get_branch_displacement(xedd)) {
1170-
is_fused_call_pop = FusablePopReg(inst_bytes[len]);
1208+
is_fused_call_pop = FusablePopReg32(inst_bytes[len]);
11711209

11721210
// Change the instruction length (to influence `next_pc` calculation) and
11731211
// the instruction category, so that users no longer interpret this
11741212
// instruction as semantically being a call.
11751213
if (is_fused_call_pop) {
1176-
len += 1u;
1177-
inst.next_pc += 1u;
1214+
extra_len = 1u;
11781215
inst.category = Instruction::kCategoryNormal;
1216+
1217+
// Look for `pop r8` et al.
1218+
} else if (64 == address_size &&
1219+
(2 + len) <= inst_bytes.size() &&
1220+
inst_bytes[len] == 0x41) {
1221+
is_fused_call_pop = FusablePopReg64(inst_bytes[len + 1]);
1222+
if (is_fused_call_pop) {
1223+
extra_len = 2u;
1224+
inst.category = Instruction::kCategoryNormal;
1225+
}
11791226
}
11801227
}
11811228

1229+
inst.category = CreateCategory(xedd);
1230+
inst.next_pc = address + len + extra_len;
1231+
11821232
// Fiddle with the size of the bytes.
11831233
if (!inst.bytes.empty() && inst.bytes.data() == inst_bytes.data()) {
1184-
CHECK_LE(len, inst.bytes.size());
1185-
inst.bytes.resize(len);
1234+
CHECK_LE(len + extra_len, inst.bytes.size());
1235+
inst.bytes.resize(len + extra_len);
11861236
} else {
1187-
inst.bytes = inst_bytes.substr(0, len);
1237+
inst.bytes = inst_bytes.substr(0, len + extra_len);
11881238
}
11891239

11901240
// Wrap an instruction in atomic begin/end if it accesses memory with RMW
@@ -1207,7 +1257,8 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
12071257
}
12081258

12091259
if (is_fused_call_pop) {
1210-
FillFusedCallPopRegOperands(inst, address_size, is_fused_call_pop);
1260+
FillFusedCallPopRegOperands(inst, address_size, is_fused_call_pop,
1261+
len);
12111262

12121263
} else {
12131264
inst.function = InstructionFunctionName(xedd);

0 commit comments

Comments
 (0)