Skip to content

Commit c688fb0

Browse files
committed
[X86][MC] Support decoding of EGPR for APX
1 parent 0723d24 commit c688fb0

File tree

7 files changed

+870
-42
lines changed

7 files changed

+870
-42
lines changed

llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp

Lines changed: 87 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206206
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
207207
}
208208

209+
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
210+
return insn->mode == MODE_64BIT && prefix == 0xd5;
211+
}
212+
209213
// Consumes all of an instruction's prefix bytes, and marks the
210214
// instruction as having them. Also sets the instruction's default operand,
211215
// address, and other relevant data sizes to report operands correctly.
@@ -337,8 +341,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
337341
return -1;
338342
}
339343

340-
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
341-
((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
344+
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
342345
insn->vectorExtensionType = TYPE_EVEX;
343346
} else {
344347
--insn->readerCursor; // unconsume byte1
@@ -357,13 +360,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
357360
return -1;
358361
}
359362

360-
// We simulate the REX prefix for simplicity's sake
361363
if (insn->mode == MODE_64BIT) {
364+
// We simulate the REX prefix for simplicity's sake
362365
insn->rexPrefix = 0x40 |
363366
(wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
364367
(rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
365368
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
366369
(bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
370+
371+
// We simulate the REX2 prefix for simplicity's sake
372+
insn->rex2ExtensionPrefix[1] =
373+
(r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
374+
(x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
375+
(b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
367376
}
368377

369378
LLVM_DEBUG(
@@ -474,6 +483,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
474483
insn->vectorExtensionPrefix[1],
475484
insn->vectorExtensionPrefix[2]));
476485
}
486+
} else if (isREX2(insn, byte)) {
487+
uint8_t byte1;
488+
if (peek(insn, byte1)) {
489+
LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
490+
return -1;
491+
}
492+
insn->rex2ExtensionPrefix[0] = byte;
493+
consume(insn, insn->rex2ExtensionPrefix[1]);
494+
495+
// We simulate the REX prefix for simplicity's sake
496+
insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
497+
(rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
498+
(xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
499+
(bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
500+
LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
501+
insn->rex2ExtensionPrefix[0],
502+
insn->rex2ExtensionPrefix[1]));
477503
} else if (isREX(insn, byte)) {
478504
if (peek(insn, nextByte))
479505
return -1;
@@ -532,7 +558,8 @@ static int readSIB(struct InternalInstruction *insn) {
532558
if (consume(insn, insn->sib))
533559
return -1;
534560

535-
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
561+
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
562+
(x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
536563

537564
if (index == 0x4) {
538565
insn->sibIndex = SIB_INDEX_NONE;
@@ -542,7 +569,8 @@ static int readSIB(struct InternalInstruction *insn) {
542569

543570
insn->sibScale = 1 << scaleFromSIB(insn->sib);
544571

545-
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
572+
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
573+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
546574

547575
switch (base) {
548576
case 0x5:
@@ -604,7 +632,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
604632

605633
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
606634
static int readModRM(struct InternalInstruction *insn) {
607-
uint8_t mod, rm, reg, evexrm;
635+
uint8_t mod, rm, reg;
608636
LLVM_DEBUG(dbgs() << "readModRM()");
609637

610638
if (insn->consumedModRM)
@@ -636,14 +664,13 @@ static int readModRM(struct InternalInstruction *insn) {
636664
break;
637665
}
638666

639-
reg |= rFromREX(insn->rexPrefix) << 3;
640-
rm |= bFromREX(insn->rexPrefix) << 3;
667+
reg |= (rFromREX(insn->rexPrefix) << 3) |
668+
(r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
669+
rm |= (bFromREX(insn->rexPrefix) << 3) |
670+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
641671

642-
evexrm = 0;
643-
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
672+
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
644673
reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
645-
evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
646-
}
647674

648675
insn->reg = (Reg)(insn->regBase + reg);
649676

@@ -731,7 +758,7 @@ static int readModRM(struct InternalInstruction *insn) {
731758
break;
732759
case 0x3:
733760
insn->eaDisplacement = EA_DISP_NONE;
734-
insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
761+
insn->eaBase = (EABase)(insn->eaRegBase + rm);
735762
break;
736763
}
737764
break;
@@ -741,6 +768,8 @@ static int readModRM(struct InternalInstruction *insn) {
741768
return 0;
742769
}
743770

771+
#define MAX_GPR_NUM (0x1f)
772+
744773
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
745774
static uint16_t name(struct InternalInstruction *insn, OperandType type, \
746775
uint8_t index, uint8_t *valid) { \
@@ -754,7 +783,7 @@ static int readModRM(struct InternalInstruction *insn) {
754783
return base + index; \
755784
case TYPE_R8: \
756785
index &= mask; \
757-
if (index > 0xf) \
786+
if (index > MAX_GPR_NUM) \
758787
*valid = 0; \
759788
if (insn->rexPrefix && index >= 4 && index <= 7) { \
760789
return prefix##_SPL + (index - 4); \
@@ -763,17 +792,17 @@ static int readModRM(struct InternalInstruction *insn) {
763792
} \
764793
case TYPE_R16: \
765794
index &= mask; \
766-
if (index > 0xf) \
795+
if (index > MAX_GPR_NUM) \
767796
*valid = 0; \
768797
return prefix##_AX + index; \
769798
case TYPE_R32: \
770799
index &= mask; \
771-
if (index > 0xf) \
800+
if (index > MAX_GPR_NUM) \
772801
*valid = 0; \
773802
return prefix##_EAX + index; \
774803
case TYPE_R64: \
775804
index &= mask; \
776-
if (index > 0xf) \
805+
if (index > MAX_GPR_NUM) \
777806
*valid = 0; \
778807
return prefix##_RAX + index; \
779808
case TYPE_ZMM: \
@@ -824,8 +853,8 @@ static int readModRM(struct InternalInstruction *insn) {
824853
// @param valid - The address of a uint8_t. The target is set to 1 if the
825854
// field is valid for the register class; 0 if not.
826855
// @return - The proper value.
827-
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
828-
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
856+
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, MAX_GPR_NUM)
857+
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, MAX_GPR_NUM)
829858

830859
// Consult an operand specifier to determine which of the fixup*Value functions
831860
// to use in correcting readModRM()'ss interpretation.
@@ -855,8 +884,31 @@ static int fixupReg(struct InternalInstruction *insn,
855884
if (!valid)
856885
return -1;
857886
break;
858-
case ENCODING_SIB:
859887
CASE_ENCODING_RM:
888+
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
889+
modFromModRM(insn->modRM) == 3) {
890+
// EVEX_X can extend the register id to 32 for a non-GPR register that is
891+
// encoded in RM.
892+
// mode : MODE_64_BIT
893+
// Only 8 vector registers are available in 32 bit mode
894+
// mod : 3
895+
// RM encodes a register
896+
switch (op->type) {
897+
case TYPE_Rv:
898+
case TYPE_R8:
899+
case TYPE_R16:
900+
case TYPE_R32:
901+
case TYPE_R64:
902+
break;
903+
default:
904+
insn->eaBase =
905+
(EABase)(insn->eaBase +
906+
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
907+
break;
908+
}
909+
}
910+
[[fallthrough]];
911+
case ENCODING_SIB:
860912
if (insn->eaBase >= insn->eaRegBase) {
861913
insn->eaBase = (EABase)fixupRMValue(
862914
insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
@@ -945,6 +997,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
945997
insn->opcodeType = XOPA_MAP;
946998
return consume(insn, insn->opcode);
947999
}
1000+
} else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
1001+
// m bit indicates opcode map 1
1002+
insn->opcodeType = TWOBYTE;
1003+
return consume(insn, insn->opcode);
9481004
}
9491005

9501006
if (consume(insn, current))
@@ -1388,10 +1444,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
13881444
if (size == 0)
13891445
size = insn->registerSize;
13901446

1447+
auto setOpcodeRegister = [&](unsigned base) {
1448+
insn->opcodeRegister =
1449+
(Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
1450+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
1451+
(insn->opcode & 7)));
1452+
};
1453+
13911454
switch (size) {
13921455
case 1:
1393-
insn->opcodeRegister = (Reg)(
1394-
MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1456+
setOpcodeRegister(MODRM_REG_AL);
13951457
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
13961458
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
13971459
insn->opcodeRegister =
@@ -1400,18 +1462,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
14001462

14011463
break;
14021464
case 2:
1403-
insn->opcodeRegister = (Reg)(
1404-
MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1465+
setOpcodeRegister(MODRM_REG_AX);
14051466
break;
14061467
case 4:
1407-
insn->opcodeRegister =
1408-
(Reg)(MODRM_REG_EAX +
1409-
((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1468+
setOpcodeRegister(MODRM_REG_EAX);
14101469
break;
14111470
case 8:
1412-
insn->opcodeRegister =
1413-
(Reg)(MODRM_REG_RAX +
1414-
((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1471+
setOpcodeRegister(MODRM_REG_RAX);
14151472
break;
14161473
}
14171474

0 commit comments

Comments
 (0)