Skip to content

[X86] Support APX promoted RAO-INT and MOVBE instructions #77431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 26, 2024
68 changes: 38 additions & 30 deletions llvm/lib/Target/X86/X86InstrMisc.td
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
OpSize16, Requires<[Not64BitMode]>;
}

let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD_Or_NoMOVBE] in {
// This instruction is a consequence of BSWAP32r observing operand size. The
// encoding is valid, but the behavior is undefined.
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
Expand Down Expand Up @@ -1090,35 +1090,43 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
OpSize16, T8;
def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
OpSize32, T8;
def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
T8;
}
let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)]>,
OpSize16, T8;
def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movbe{l}\t{$src, $dst|$dst, $src}",
[(store (bswap GR32:$src), addr:$dst)]>,
OpSize32, T8;
def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)]>,
T8;
}
multiclass Movbe<bits<8> o, X86TypeInfo t, string suffix = ""> {
def rm#suffix : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
(ins t.MemOperand:$src1), "movbe", unaryop_ndd_args,
[(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>,
Sched<[WriteALULd]>;
def mr#suffix : ITy<!add(o, 1), MRMDestMem, t, (outs),
(ins t.MemOperand:$dst, t.RegClass:$src1),
"movbe", unaryop_ndd_args,
[(store (bswap t.RegClass:$src1), addr:$dst)]>,
Sched<[WriteStore]>;
}

let Predicates = [HasMOVBE, NoEGPR] in {
defm MOVBE16 : Movbe<0xF0, Xi16>, OpSize16, T8;
defm MOVBE32 : Movbe<0xF0, Xi32>, OpSize32, T8;
defm MOVBE64 : Movbe<0xF0, Xi64>, T8;
}

let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
defm MOVBE16 : Movbe<0x60, Xi16, "_EVEX">, EVEX, T_MAP4, PD;
defm MOVBE32 : Movbe<0x60, Xi32, "_EVEX">, EVEX, T_MAP4;
defm MOVBE64 : Movbe<0x60, Xi64, "_EVEX">, EVEX, T_MAP4;
}

multiclass Movberr<X86TypeInfo t> {
def rr : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
[(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
EVEX, T_MAP4;
def rr_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
EVEX, T_MAP4, DisassembleOnly;
}
let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
defm MOVBE16 : Movberr<Xi16>, PD;
defm MOVBE32 : Movberr<Xi32>;
defm MOVBE64 : Movberr<Xi64>;
}

//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrPredicates.td
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def HasTBM : Predicate<"Subtarget->hasTBM()">;
def NoTBM : Predicate<"!Subtarget->hasTBM()">;
def HasLWP : Predicate<"Subtarget->hasLWP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def NoNDD_Or_NoMOVBE : Predicate<"!Subtarget->hasNDD() || !Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
Expand Down
32 changes: 17 additions & 15 deletions llvm/lib/Target/X86/X86InstrRAOINT.td
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@ def X86rao_xor : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
def X86rao_and : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

multiclass RAOINT_BASE<string OpcodeStr> {
let Predicates = [HasRAOINT] in
def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
!strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
Sched<[WriteALURMW]>;
multiclass RaoInt<string m, string suffix = ""> {
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)] in
def 32mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi32>;
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)] in
def 64mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi64>;
}

let Predicates = [HasRAOINT, In64BitMode] in
def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
!strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
Sched<[WriteALURMW]>, REX_W;
let Predicates = [HasRAOINT, NoEGPR] in {
defm AADD : RaoInt<"add">, T8;
defm AAND : RaoInt<"and">, T8, PD;
defm AOR : RaoInt<"or" >, T8, XD;
defm AXOR : RaoInt<"xor">, T8, XS;
}

defm AADD : RAOINT_BASE<"add">, T8;
defm AAND : RAOINT_BASE<"and">, T8, PD;
defm AOR : RAOINT_BASE<"or" >, T8, XD;
defm AXOR : RAOINT_BASE<"xor">, T8, XS;
let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
defm AOR : RaoInt<"or", "_EVEX">, EVEX, T_MAP4, XD;
defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
}
214 changes: 189 additions & 25 deletions llvm/test/CodeGen/X86/movbe.ll
Original file line number Diff line number Diff line change
@@ -1,66 +1,230 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM

; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd,+movbe --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=NOMOVBE
declare i16 @llvm.bswap.i16(i16) nounwind readnone
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare i64 @llvm.bswap.i64(i64) nounwind readnone

define void @test1(ptr nocapture %x, i16 %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movbew %si, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test1:
; SLM: # %bb.0:
; SLM-NEXT: movbew %si, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test1:
; EGPR: # %bb.0:
; EGPR-NEXT: movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test1:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: rolw $8, %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc6,0x08]
; NOMOVBE-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i16 @llvm.bswap.i16(i16 %y)
store i16 %bswap, ptr %x, align 2
ret void
; CHECK-LABEL: test1:
; CHECK: movbew %si, (%rdi)
; SLM-LABEL: test1:
; SLM: movbew %si, (%rdi)
}

define i16 @test2(ptr %x) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movbew (%rdi), %ax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test2:
; SLM: # %bb.0:
; SLM-NEXT: movbew (%rdi), %ax
; SLM-NEXT: retq
;
; EGPR-LABEL: test2:
; EGPR: # %bb.0:
; EGPR-NEXT: movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test2:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i16, ptr %x, align 2
%bswap = call i16 @llvm.bswap.i16(i16 %load)
ret i16 %bswap
; CHECK-LABEL: test2:
; CHECK: movbew (%rdi), %ax
; SLM-LABEL: test2:
; SLM: movbew (%rdi), %ax
}

define void @test3(ptr nocapture %x, i32 %y) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movbel %esi, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test3:
; SLM: # %bb.0:
; SLM-NEXT: movbel %esi, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test3:
; EGPR: # %bb.0:
; EGPR-NEXT: movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test3:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: bswapl %esi # encoding: [0x0f,0xce]
; NOMOVBE-NEXT: movl %esi, (%rdi) # encoding: [0x89,0x37]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i32 @llvm.bswap.i32(i32 %y)
store i32 %bswap, ptr %x, align 4
ret void
; CHECK-LABEL: test3:
; CHECK: movbel %esi, (%rdi)
; SLM-LABEL: test3:
; SLM: movbel %esi, (%rdi)
}

define i32 @test4(ptr %x) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movbel (%rdi), %eax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test4:
; SLM: # %bb.0:
; SLM-NEXT: movbel (%rdi), %eax
; SLM-NEXT: retq
;
; EGPR-LABEL: test4:
; EGPR: # %bb.0:
; EGPR-NEXT: movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test4:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; NOMOVBE-NEXT: bswapl %eax # encoding: [0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i32, ptr %x, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %load)
ret i32 %bswap
; CHECK-LABEL: test4:
; CHECK: movbel (%rdi), %eax
; SLM-LABEL: test4:
; SLM: movbel (%rdi), %eax
}

define void @test5(ptr %x, i64 %y) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: movbeq %rsi, (%rdi)
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test5:
; SLM: # %bb.0:
; SLM-NEXT: movbeq %rsi, (%rdi)
; SLM-NEXT: retq
;
; EGPR-LABEL: test5:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test5:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: bswapq %rsi # encoding: [0x48,0x0f,0xce]
; NOMOVBE-NEXT: movq %rsi, (%rdi) # encoding: [0x48,0x89,0x37]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i64 @llvm.bswap.i64(i64 %y)
store i64 %bswap, ptr %x, align 8
ret void
; CHECK-LABEL: test5:
; CHECK: movbeq %rsi, (%rdi)
; SLM-LABEL: test5:
; SLM: movbeq %rsi, (%rdi)
}

define i64 @test6(ptr %x) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: movbeq (%rdi), %rax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test6:
; SLM: # %bb.0:
; SLM-NEXT: movbeq (%rdi), %rax
; SLM-NEXT: retq
;
; EGPR-LABEL: test6:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test6:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%load = load i64, ptr %x, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %load)
ret i64 %bswap
; CHECK-LABEL: test6:
; CHECK: movbeq (%rdi), %rax
; SLM-LABEL: test6:
; SLM: movbeq (%rdi), %rax
}

define i64 @test7(i64 %x) nounwind {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: retq
;
; SLM-LABEL: test7:
; SLM: # %bb.0:
; SLM-NEXT: movq %rdi, %rax
; SLM-NEXT: bswapq %rax
; SLM-NEXT: retq
;
; EGPR-LABEL: test7:
; EGPR: # %bb.0:
; EGPR-NEXT: movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
; EGPR-NEXT: retq # encoding: [0xc3]
;
; NOMOVBE-LABEL: test7:
; NOMOVBE: # %bb.0:
; NOMOVBE-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
; NOMOVBE-NEXT: retq # encoding: [0xc3]
%bswap = call i64 @llvm.bswap.i64(i64 %x)
ret i64 %bswap
}
Loading