@@ -206,6 +206,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206
206
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f ;
207
207
}
208
208
209
+ static bool isREX2 (struct InternalInstruction *insn, uint8_t prefix) {
210
+ return insn->mode == MODE_64BIT && prefix == 0xd5 ;
211
+ }
212
+
209
213
// Consumes all of an instruction's prefix bytes, and marks the
210
214
// instruction as having them. Also sets the instruction's default operand,
211
215
// address, and other relevant data sizes to report operands correctly.
@@ -337,8 +341,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
337
341
return -1 ;
338
342
}
339
343
340
- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 ) &&
341
- ((~byte1 & 0x8 ) == 0x8 ) && ((byte2 & 0x4 ) == 0x4 )) {
344
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 )) {
342
345
insn->vectorExtensionType = TYPE_EVEX;
343
346
} else {
344
347
--insn->readerCursor ; // unconsume byte1
@@ -357,13 +360,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
357
360
return -1 ;
358
361
}
359
362
360
- // We simulate the REX prefix for simplicity's sake
361
363
if (insn->mode == MODE_64BIT) {
364
+ // We simulate the REX prefix for simplicity's sake
362
365
insn->rexPrefix = 0x40 |
363
366
(wFromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 3 ) |
364
367
(rFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 2 ) |
365
368
(xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 1 ) |
366
369
(bFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 0 );
370
+
371
+ // We simulate the REX2 prefix for simplicity's sake
372
+ insn->rex2ExtensionPrefix [1 ] =
373
+ (r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 6 ) |
374
+ (x2FromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 5 ) |
375
+ (b2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 );
367
376
}
368
377
369
378
LLVM_DEBUG (
@@ -474,6 +483,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
474
483
insn->vectorExtensionPrefix [1 ],
475
484
insn->vectorExtensionPrefix [2 ]));
476
485
}
486
+ } else if (isREX2 (insn, byte)) {
487
+ uint8_t byte1;
488
+ if (peek (insn, byte1)) {
489
+ LLVM_DEBUG (dbgs () << " Couldn't read second byte of REX2" );
490
+ return -1 ;
491
+ }
492
+ insn->rex2ExtensionPrefix [0 ] = byte;
493
+ consume (insn, insn->rex2ExtensionPrefix [1 ]);
494
+
495
+ // We simulate the REX prefix for simplicity's sake
496
+ insn->rexPrefix = 0x40 | (wFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 3 ) |
497
+ (rFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 2 ) |
498
+ (xFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 1 ) |
499
+ (bFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 0 );
500
+ LLVM_DEBUG (dbgs () << format (" Found REX2 prefix 0x%hhx 0x%hhx" ,
501
+ insn->rex2ExtensionPrefix [0 ],
502
+ insn->rex2ExtensionPrefix [1 ]));
477
503
} else if (isREX (insn, byte)) {
478
504
if (peek (insn, nextByte))
479
505
return -1 ;
@@ -532,7 +558,8 @@ static int readSIB(struct InternalInstruction *insn) {
532
558
if (consume (insn, insn->sib ))
533
559
return -1 ;
534
560
535
- index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 );
561
+ index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 ) |
562
+ (x2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
536
563
537
564
if (index == 0x4 ) {
538
565
insn->sibIndex = SIB_INDEX_NONE;
@@ -542,7 +569,8 @@ static int readSIB(struct InternalInstruction *insn) {
542
569
543
570
insn->sibScale = 1 << scaleFromSIB (insn->sib );
544
571
545
- base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 );
572
+ base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 ) |
573
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
546
574
547
575
switch (base) {
548
576
case 0x5 :
@@ -604,7 +632,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
604
632
605
633
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
606
634
static int readModRM (struct InternalInstruction *insn) {
607
- uint8_t mod, rm, reg, evexrm ;
635
+ uint8_t mod, rm, reg;
608
636
LLVM_DEBUG (dbgs () << " readModRM()" );
609
637
610
638
if (insn->consumedModRM )
@@ -636,14 +664,13 @@ static int readModRM(struct InternalInstruction *insn) {
636
664
break ;
637
665
}
638
666
639
- reg |= rFromREX (insn->rexPrefix ) << 3 ;
640
- rm |= bFromREX (insn->rexPrefix ) << 3 ;
667
+ reg |= (rFromREX (insn->rexPrefix ) << 3 ) |
668
+ (r2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
669
+ rm |= (bFromREX (insn->rexPrefix ) << 3 ) |
670
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
641
671
642
- evexrm = 0 ;
643
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
672
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
644
673
reg |= r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
645
- evexrm = xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
646
- }
647
674
648
675
insn->reg = (Reg)(insn->regBase + reg);
649
676
@@ -731,7 +758,7 @@ static int readModRM(struct InternalInstruction *insn) {
731
758
break ;
732
759
case 0x3 :
733
760
insn->eaDisplacement = EA_DISP_NONE;
734
- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm );
761
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
735
762
break ;
736
763
}
737
764
break ;
@@ -741,6 +768,8 @@ static int readModRM(struct InternalInstruction *insn) {
741
768
return 0 ;
742
769
}
743
770
771
+ #define MAX_GPR_NUM (0x1f )
772
+
744
773
#define GENERIC_FIXUP_FUNC (name, base, prefix, mask ) \
745
774
static uint16_t name (struct InternalInstruction *insn, OperandType type, \
746
775
uint8_t index, uint8_t *valid) { \
@@ -754,7 +783,7 @@ static int readModRM(struct InternalInstruction *insn) {
754
783
return base + index ; \
755
784
case TYPE_R8: \
756
785
index &= mask; \
757
- if (index > 0xf ) \
786
+ if (index > MAX_GPR_NUM) \
758
787
*valid = 0 ; \
759
788
if (insn->rexPrefix && index >= 4 && index <= 7 ) { \
760
789
return prefix##_SPL + (index - 4 ); \
@@ -763,17 +792,17 @@ static int readModRM(struct InternalInstruction *insn) {
763
792
} \
764
793
case TYPE_R16: \
765
794
index &= mask; \
766
- if (index > 0xf ) \
795
+ if (index > MAX_GPR_NUM) \
767
796
*valid = 0 ; \
768
797
return prefix##_AX + index ; \
769
798
case TYPE_R32: \
770
799
index &= mask; \
771
- if (index > 0xf ) \
800
+ if (index > MAX_GPR_NUM) \
772
801
*valid = 0 ; \
773
802
return prefix##_EAX + index ; \
774
803
case TYPE_R64: \
775
804
index &= mask; \
776
- if (index > 0xf ) \
805
+ if (index > MAX_GPR_NUM) \
777
806
*valid = 0 ; \
778
807
return prefix##_RAX + index ; \
779
808
case TYPE_ZMM: \
@@ -824,8 +853,8 @@ static int readModRM(struct InternalInstruction *insn) {
824
853
// @param valid - The address of a uint8_t. The target is set to 1 if the
825
854
// field is valid for the register class; 0 if not.
826
855
// @return - The proper value.
827
- GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG, 0x1f )
828
- GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf )
856
+ GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG, MAX_GPR_NUM )
857
+ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, MAX_GPR_NUM )
829
858
830
859
// Consult an operand specifier to determine which of the fixup*Value functions
831
860
// to use in correcting readModRM()'ss interpretation.
@@ -855,8 +884,31 @@ static int fixupReg(struct InternalInstruction *insn,
855
884
if (!valid)
856
885
return -1 ;
857
886
break ;
858
- case ENCODING_SIB:
859
887
CASE_ENCODING_RM:
888
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
889
+ modFromModRM (insn->modRM ) == 3 ) {
890
+ // EVEX_X can extend the register id to 32 for a non-GPR register that is
891
+ // encoded in RM.
892
+ // mode : MODE_64_BIT
893
+ // Only 8 vector registers are available in 32 bit mode
894
+ // mod : 3
895
+ // RM encodes a register
896
+ switch (op->type ) {
897
+ case TYPE_Rv:
898
+ case TYPE_R8:
899
+ case TYPE_R16:
900
+ case TYPE_R32:
901
+ case TYPE_R64:
902
+ break ;
903
+ default :
904
+ insn->eaBase =
905
+ (EABase)(insn->eaBase +
906
+ (xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ));
907
+ break ;
908
+ }
909
+ }
910
+ [[fallthrough]];
911
+ case ENCODING_SIB:
860
912
if (insn->eaBase >= insn->eaRegBase ) {
861
913
insn->eaBase = (EABase)fixupRMValue (
862
914
insn, (OperandType)op->type , insn->eaBase - insn->eaRegBase , &valid);
@@ -945,6 +997,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
945
997
insn->opcodeType = XOPA_MAP;
946
998
return consume (insn, insn->opcode );
947
999
}
1000
+ } else if (mFromREX2 (insn->rex2ExtensionPrefix [1 ])) {
1001
+ // m bit indicates opcode map 1
1002
+ insn->opcodeType = TWOBYTE;
1003
+ return consume (insn, insn->opcode );
948
1004
}
949
1005
950
1006
if (consume (insn, current))
@@ -1388,10 +1444,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1388
1444
if (size == 0 )
1389
1445
size = insn->registerSize ;
1390
1446
1447
+ auto setOpcodeRegister = [&](unsigned base) {
1448
+ insn->opcodeRegister =
1449
+ (Reg)(base + ((bFromREX (insn->rexPrefix ) << 3 ) |
1450
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 ) |
1451
+ (insn->opcode & 7 )));
1452
+ };
1453
+
1391
1454
switch (size) {
1392
1455
case 1 :
1393
- insn->opcodeRegister = (Reg)(
1394
- MODRM_REG_AL + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1456
+ setOpcodeRegister (MODRM_REG_AL);
1395
1457
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1396
1458
insn->opcodeRegister < MODRM_REG_AL + 0x8 ) {
1397
1459
insn->opcodeRegister =
@@ -1400,18 +1462,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1400
1462
1401
1463
break ;
1402
1464
case 2 :
1403
- insn->opcodeRegister = (Reg)(
1404
- MODRM_REG_AX + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1465
+ setOpcodeRegister (MODRM_REG_AX);
1405
1466
break ;
1406
1467
case 4 :
1407
- insn->opcodeRegister =
1408
- (Reg)(MODRM_REG_EAX +
1409
- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1468
+ setOpcodeRegister (MODRM_REG_EAX);
1410
1469
break ;
1411
1470
case 8 :
1412
- insn->opcodeRegister =
1413
- (Reg)(MODRM_REG_RAX +
1414
- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1471
+ setOpcodeRegister (MODRM_REG_RAX);
1415
1472
break ;
1416
1473
}
1417
1474
0 commit comments