@@ -967,7 +967,7 @@ static bool IsAVX(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
967967 case XED_ISA_SET_AVX_VNNI:
968968 return true ;
969969 default :
970- return false ;
970+ break ;
971971 }
972972 switch (category) {
973973 case XED_CATEGORY_AVX:
@@ -1057,7 +1057,11 @@ static bool IsAVX512(xed_isa_set_enum_t isa_set, xed_category_enum_t category) {
10571057 }
10581058}
10591059
1060- static const char *FusablePopReg (char byte) {
1060+ // Decode the destination register of a `pop <reg>`, where `byte` is the only
1061+ // byte of a 1-byte opcode. On 64-bit, the same decoded by maps to a 64-bit
1062+ // register. We apply a fixup below in `FillFusedCallPopRegOperands` to account
1063+ // for upgrading the register.
1064+ static const char *FusablePopReg32 (char byte) {
10611065 switch (static_cast <uint8_t >(byte)) {
10621066 case 0x58 : return " EAX" ;
10631067 case 0x59 : return " ECX" ;
@@ -1073,15 +1077,40 @@ static const char *FusablePopReg(char byte) {
10731077 }
10741078}
10751079
1080+ // Decode the destination register of a `pop r8` through `pop r10`, assuming
1081+ // that we've already decoded the `0x41` prefix, and `byte` is the second byte
1082+ // of the two-byte opcode.
1083+ static const char *FusablePopReg64 (char byte) {
1084+ switch (static_cast <uint8_t >(byte)) {
1085+ case 0x58 : return " R8" ;
1086+ case 0x59 : return " R9" ;
1087+ case 0x5a : return " R10" ;
1088+ case 0x5b : return " R11" ;
1089+ case 0x5c : return " R12" ;
1090+ case 0x5d : return " R13" ;
1091+ case 0x5e : return " R14" ;
1092+ case 0x5f : return " R15" ;
1093+ default : return nullptr ;
1094+ }
1095+ }
1096+
1097+ // Fill in the operands for a fused `call+pop` pair. This ends up acting like
1098+ // a `mov` variant, and the semantic is located in `DATAXFER`. Fusing of this
1099+ // pair is beneficial to avoid downstream users from treating the initial call
1100+ // as semantically being a function call, when really this is more of a move
1101+ // instruction. Downstream users like McSema and Anvill benefit from seeing this
1102+ // as a MOV-variant because of how they identify cross-references related to
1103+ // uses of the program counter (`PC`) register.
10761104static void FillFusedCallPopRegOperands (Instruction &inst,
10771105 unsigned address_size,
1078- const char *dest_reg_name32) {
1106+ const char *dest_reg_name,
1107+ unsigned call_inst_len) {
10791108 inst.operands .resize (2 );
10801109 auto &dest = inst.operands [0 ];
10811110 auto &src = inst.operands [1 ];
10821111
10831112 dest.type = Operand::kTypeRegister ;
1084- dest.reg .name = dest_reg_name32 ;
1113+ dest.reg .name = dest_reg_name ;
10851114 dest.reg .size = address_size;
10861115 dest.size = address_size;
10871116 dest.action = Operand::kActionWrite ;
@@ -1092,14 +1121,26 @@ static void FillFusedCallPopRegOperands(Instruction &inst,
10921121 src.addr .address_size = address_size;
10931122 src.addr .base_reg .name = " PC" ;
10941123 src.addr .base_reg .size = address_size;
1095- src.addr .displacement = (inst. next_pc - inst. pc ) - 1u ;
1124+ src.addr .displacement = static_cast < int64_t >(call_inst_len) ;
10961125 src.addr .kind = Operand::Address::kAddressCalculation ;
10971126
10981127 if (32 == address_size) {
10991128 inst.function = " CALL_POP_FUSED_32" ;
11001129
11011130 } else {
11021131 inst.function = " CALL_POP_FUSED_64" ;
1132+
1133+ // Rename the register to be a 64-bit register. `pop eax` when decoded as
1134+ // a 32-bit instruction, and `pop rax` when decoded as a 64-bit instruction,
1135+ // both have the same binary representation. So for these cases, we store
1136+ // a 32-bit register name, such as `EAX` in `dest_reg_name`. If we're doing
1137+ // a fuse on 64-bit, then we want to upgrade the destination register to
1138+ // its `R`-prefixed variant, lest we accidentally discard the high 32 bits.
1139+ //
1140+ // For the case of `pop r8` et al. on 64 bit, `dest_reg_name` contains the
1141+ // 64-bit register name, and so the injection of `R` acts as a no-op.
1142+ //
1143+ // NOTE(pag): See `FusablePopReg32` and `FusablePopReg64`.
11031144 dest.reg .name [0 ] = ' R' ;
11041145 }
11051146}
@@ -1117,13 +1158,13 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
11171158
11181159 xed_decoded_inst_t xedd_;
11191160 xed_decoded_inst_t *xedd = &xedd_;
1120- auto mode = 32 == address_size ? &kXEDState32 : &kXEDState64 ;
1121-
1161+ const auto mode = 32 == address_size ? &kXEDState32 : &kXEDState64 ;
11221162 if (!DecodeXED (xedd, mode, inst_bytes, address)) {
11231163 return false ;
11241164 }
11251165
11261166 auto len = xed_decoded_inst_get_length (xedd);
1167+ auto extra_len = 0u ; // From fusing.
11271168 const auto iform = xed_decoded_inst_get_iform_enum (xedd);
11281169 const auto xedi = xed_decoded_inst_inst (xedd);
11291170 const auto num_operands = xed_decoded_inst_noperands (xedd);
@@ -1158,33 +1199,42 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
11581199 return false ;
11591200 }
11601201
1161- inst.category = CreateCategory (xedd);
1162- inst.next_pc = address + len;
1163-
11641202 // Look for instruction fusing opportunities. For now, just `call; pop`.
11651203 const char *is_fused_call_pop = nullptr ;
11661204 if (len < inst_bytes.size () &&
11671205 (iform == XED_IFORM_CALL_NEAR_RELBRd ||
11681206 iform == XED_IFORM_CALL_NEAR_RELBRz) &&
11691207 !xed_decoded_inst_get_branch_displacement (xedd)) {
1170- is_fused_call_pop = FusablePopReg (inst_bytes[len]);
1208+ is_fused_call_pop = FusablePopReg32 (inst_bytes[len]);
11711209
11721210 // Change the instruction length (to influence `next_pc` calculation) and
11731211 // the instruction category, so that users no longer interpret this
11741212 // instruction as semantically being a call.
11751213 if (is_fused_call_pop) {
1176- len += 1u ;
1177- inst.next_pc += 1u ;
1214+ extra_len = 1u ;
11781215 inst.category = Instruction::kCategoryNormal ;
1216+
1217+ // Look for `pop r8` et al.
1218+ } else if (64 == address_size &&
1219+ (2 + len) <= inst_bytes.size () &&
1220+ inst_bytes[len] == 0x41 ) {
1221+ is_fused_call_pop = FusablePopReg64 (inst_bytes[len + 1 ]);
1222+ if (is_fused_call_pop) {
1223+ extra_len = 2u ;
1224+ inst.category = Instruction::kCategoryNormal ;
1225+ }
11791226 }
11801227 }
11811228
1229+ inst.category = CreateCategory (xedd);
1230+ inst.next_pc = address + len + extra_len;
1231+
11821232 // Fiddle with the size of the bytes.
11831233 if (!inst.bytes .empty () && inst.bytes .data () == inst_bytes.data ()) {
1184- CHECK_LE (len, inst.bytes .size ());
1185- inst.bytes .resize (len);
1234+ CHECK_LE (len + extra_len , inst.bytes .size ());
1235+ inst.bytes .resize (len + extra_len );
11861236 } else {
1187- inst.bytes = inst_bytes.substr (0 , len);
1237+ inst.bytes = inst_bytes.substr (0 , len + extra_len );
11881238 }
11891239
11901240 // Wrap an instruction in atomic begin/end if it accesses memory with RMW
@@ -1207,7 +1257,8 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes,
12071257 }
12081258
12091259 if (is_fused_call_pop) {
1210- FillFusedCallPopRegOperands (inst, address_size, is_fused_call_pop);
1260+ FillFusedCallPopRegOperands (inst, address_size, is_fused_call_pop,
1261+ len);
12111262
12121263 } else {
12131264 inst.function = InstructionFunctionName (xedd);
0 commit comments