Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit a01e768

Browse files
author
Uriel Korach
committed
[X86] [PATCH] [intrinsics] Lowering X86 ABS intrinsics to IR. (llvm)
This patch, together with a matching clang patch (https://reviews.llvm.org/D37694), implements the lowering of X86 ABS intrinsics to IR. differential revision: https://reviews.llvm.org/D37693. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313134 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent bddbeac commit a01e768

16 files changed

+321
-398
lines changed

include/llvm/IR/IntrinsicsX86.td

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -658,18 +658,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
658658
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
659659
def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">,
660660
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
661-
def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
662-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
663661

664662
def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">,
665663
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
666-
def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
667-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
668664

669665
def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">,
670666
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
671-
def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
672-
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
673667
}
674668

675669
//===----------------------------------------------------------------------===//
@@ -1829,88 +1823,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
18291823
llvm_v8i32_ty], [IntrNoMem]>;
18301824
}
18311825

1832-
// Absolute value ops
1833-
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1834-
def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
1835-
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
1836-
def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
1837-
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
1838-
def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
1839-
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
1840-
1841-
def int_x86_avx512_mask_pabs_b_128 :
1842-
GCCBuiltin<"__builtin_ia32_pabsb128_mask">,
1843-
Intrinsic<[llvm_v16i8_ty],
1844-
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
1845-
[IntrNoMem]>;
1846-
1847-
def int_x86_avx512_mask_pabs_b_256 :
1848-
GCCBuiltin<"__builtin_ia32_pabsb256_mask">,
1849-
Intrinsic<[llvm_v32i8_ty],
1850-
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
1851-
[IntrNoMem]>;
1852-
1853-
def int_x86_avx512_mask_pabs_b_512 :
1854-
GCCBuiltin<"__builtin_ia32_pabsb512_mask">,
1855-
Intrinsic<[llvm_v64i8_ty],
1856-
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
1857-
[IntrNoMem]>;
1858-
1859-
def int_x86_avx512_mask_pabs_d_128 :
1860-
GCCBuiltin<"__builtin_ia32_pabsd128_mask">,
1861-
Intrinsic<[llvm_v4i32_ty],
1862-
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
1863-
[IntrNoMem]>;
1864-
1865-
def int_x86_avx512_mask_pabs_d_256 :
1866-
GCCBuiltin<"__builtin_ia32_pabsd256_mask">,
1867-
Intrinsic<[llvm_v8i32_ty],
1868-
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
1869-
[IntrNoMem]>;
1870-
1871-
def int_x86_avx512_mask_pabs_d_512 :
1872-
GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
1873-
Intrinsic<[llvm_v16i32_ty],
1874-
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
1875-
[IntrNoMem]>;
1876-
1877-
def int_x86_avx512_mask_pabs_q_128 :
1878-
GCCBuiltin<"__builtin_ia32_pabsq128_mask">,
1879-
Intrinsic<[llvm_v2i64_ty],
1880-
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
1881-
[IntrNoMem]>;
1882-
1883-
def int_x86_avx512_mask_pabs_q_256 :
1884-
GCCBuiltin<"__builtin_ia32_pabsq256_mask">,
1885-
Intrinsic<[llvm_v4i64_ty],
1886-
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
1887-
[IntrNoMem]>;
1888-
1889-
def int_x86_avx512_mask_pabs_q_512 :
1890-
GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
1891-
Intrinsic<[llvm_v8i64_ty],
1892-
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
1893-
[IntrNoMem]>;
1894-
1895-
def int_x86_avx512_mask_pabs_w_128 :
1896-
GCCBuiltin<"__builtin_ia32_pabsw128_mask">,
1897-
Intrinsic<[llvm_v8i16_ty],
1898-
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
1899-
[IntrNoMem]>;
1900-
1901-
def int_x86_avx512_mask_pabs_w_256 :
1902-
GCCBuiltin<"__builtin_ia32_pabsw256_mask">,
1903-
Intrinsic<[llvm_v16i16_ty],
1904-
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
1905-
[IntrNoMem]>;
1906-
1907-
def int_x86_avx512_mask_pabs_w_512 :
1908-
GCCBuiltin<"__builtin_ia32_pabsw512_mask">,
1909-
Intrinsic<[llvm_v32i16_ty],
1910-
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
1911-
[IntrNoMem]>;
1912-
}
1913-
19141826
// Horizontal arithmetic ops
19151827
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
19161828
def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,

lib/IR/AutoUpgrade.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,12 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
7272
// like to use this information to remove upgrade code for some older
7373
// intrinsics. It is currently undecided how we will determine that future
7474
// point.
75-
if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
75+
if (Name=="ssse3.pabs.b.128" || // Added in 6.0
76+
Name=="ssse3.pabs.w.128" || // Added in 6.0
77+
Name=="ssse3.pabs.d.128" || // Added in 6.0
78+
Name.startswith("avx2.pabs.") || // Added in 6.0
79+
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
80+
Name.startswith("sse2.pcmpeq.") || // Added in 3.1
7681
Name.startswith("sse2.pcmpgt.") || // Added in 3.1
7782
Name.startswith("avx2.pcmpeq.") || // Added in 3.1
7883
Name.startswith("avx2.pcmpgt.") || // Added in 3.1
@@ -793,6 +798,20 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
793798
return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
794799
}
795800

801+
static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
802+
Value *Op0 = CI.getArgOperand(0);
803+
llvm::Type *Ty = Op0->getType();
804+
Value *Zero = llvm::Constant::getNullValue(Ty);
805+
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
806+
Value *Neg = Builder.CreateNeg(Op0);
807+
Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
808+
809+
if (CI.getNumArgOperands() == 3)
810+
Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
811+
812+
return Res;
813+
}
814+
796815
static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
797816
ICmpInst::Predicate Pred) {
798817
Value *Op0 = CI.getArgOperand(0);
@@ -1056,6 +1075,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
10561075
} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
10571076
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
10581077
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1078+
} else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1079+
Name == "ssse3.pabs.w.128" ||
1080+
Name == "ssse3.pabs.d.128" ||
1081+
Name.startswith("avx2.pabs") ||
1082+
Name.startswith("avx512.mask.pabs"))) {
1083+
Rep = upgradeAbs(Builder, *CI);
10591084
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
10601085
Name == "sse2.pmaxs.w" ||
10611086
Name == "sse41.pmaxsd" ||

lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
372372
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
373373
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
374374
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
375-
X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
376-
X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
377-
X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
378375
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
379376
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
380377
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -792,18 +789,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
792789
X86ISD::FMULS_RND, 0),
793790
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
794791
X86ISD::FMULS_RND, 0),
795-
X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
796-
X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
797-
X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
798-
X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
799-
X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
800-
X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
801-
X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
802-
X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
803-
X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
804-
X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
805-
X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
806-
X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
807792
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
808793
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
809794
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -1629,9 +1614,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
16291614
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
16301615
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
16311616
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
1632-
X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
1633-
X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
1634-
X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
16351617
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
16361618
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
16371619
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),

test/CodeGen/X86/avx2-intrinsics-fast-isel.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ define <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) {
1515
; X64-NEXT: vpabsb %ymm0, %ymm0
1616
; X64-NEXT: retq
1717
%arg = bitcast <4 x i64> %a0 to <32 x i8>
18-
%call = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %arg)
19-
%res = bitcast <32 x i8> %call to <4 x i64>
18+
%sub = sub <32 x i8> zeroinitializer, %arg
19+
%cmp = icmp sgt <32 x i8> %arg, zeroinitializer
20+
%sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub
21+
%res = bitcast <32 x i8> %sel to <4 x i64>
2022
ret <4 x i64> %res
2123
}
2224
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
@@ -32,8 +34,10 @@ define <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) {
3234
; X64-NEXT: vpabsw %ymm0, %ymm0
3335
; X64-NEXT: retq
3436
%arg = bitcast <4 x i64> %a0 to <16 x i16>
35-
%call = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %arg)
36-
%res = bitcast <16 x i16> %call to <4 x i64>
37+
%sub = sub <16 x i16> zeroinitializer, %arg
38+
%cmp = icmp sgt <16 x i16> %arg, zeroinitializer
39+
%sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub
40+
%res = bitcast <16 x i16> %sel to <4 x i64>
3741
ret <4 x i64> %res
3842
}
3943
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
@@ -49,8 +53,10 @@ define <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) {
4953
; X64-NEXT: vpabsd %ymm0, %ymm0
5054
; X64-NEXT: retq
5155
%arg = bitcast <4 x i64> %a0 to <8 x i32>
52-
%call = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %arg)
53-
%res = bitcast <8 x i32> %call to <4 x i64>
56+
%sub = sub <8 x i32> zeroinitializer, %arg
57+
%cmp = icmp sgt <8 x i32> %arg, zeroinitializer
58+
%sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub
59+
%res = bitcast <8 x i32> %sel to <4 x i64>
5460
ret <4 x i64> %res
5561
}
5662
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone

test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,3 +534,34 @@ define <16 x i16> @mm256_avg_epu16(<16 x i16> %a0, <16 x i16> %a1) {
534534
}
535535
declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
536536

537+
define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
538+
; AVX2-LABEL: test_x86_avx2_pabs_b:
539+
; AVX2: ## BB#0:
540+
; AVX2-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
541+
; AVX2-NEXT: retl ## encoding: [0xc3]
542+
%res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
543+
ret <32 x i8> %res
544+
}
545+
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
546+
547+
define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
548+
; AVX2-LABEL: test_x86_avx2_pabs_d:
549+
; AVX2: ## BB#0:
550+
; AVX2-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
551+
; AVX2-NEXT: retl ## encoding: [0xc3]
552+
%res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
553+
ret <8 x i32> %res
554+
}
555+
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
556+
557+
558+
define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
559+
; AVX2-LABEL: test_x86_avx2_pabs_w:
560+
; AVX2: ## BB#0:
561+
; AVX2-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
562+
; AVX2-NEXT: retl ## encoding: [0xc3]
563+
%res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
564+
ret <16 x i16> %res
565+
}
566+
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
567+

test/CodeGen/X86/avx2-intrinsics-x86.ll

Lines changed: 15 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -589,55 +589,6 @@ define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
589589
}
590590
declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
591591

592-
593-
define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
594-
; AVX2-LABEL: test_x86_avx2_pabs_b:
595-
; AVX2: ## BB#0:
596-
; AVX2-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
597-
; AVX2-NEXT: retl ## encoding: [0xc3]
598-
;
599-
; AVX512VL-LABEL: test_x86_avx2_pabs_b:
600-
; AVX512VL: ## BB#0:
601-
; AVX512VL-NEXT: vpabsb %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0]
602-
; AVX512VL-NEXT: retl ## encoding: [0xc3]
603-
%res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
604-
ret <32 x i8> %res
605-
}
606-
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
607-
608-
609-
define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
610-
; AVX2-LABEL: test_x86_avx2_pabs_d:
611-
; AVX2: ## BB#0:
612-
; AVX2-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
613-
; AVX2-NEXT: retl ## encoding: [0xc3]
614-
;
615-
; AVX512VL-LABEL: test_x86_avx2_pabs_d:
616-
; AVX512VL: ## BB#0:
617-
; AVX512VL-NEXT: vpabsd %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
618-
; AVX512VL-NEXT: retl ## encoding: [0xc3]
619-
%res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
620-
ret <8 x i32> %res
621-
}
622-
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
623-
624-
625-
define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
626-
; AVX2-LABEL: test_x86_avx2_pabs_w:
627-
; AVX2: ## BB#0:
628-
; AVX2-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
629-
; AVX2-NEXT: retl ## encoding: [0xc3]
630-
;
631-
; AVX512VL-LABEL: test_x86_avx2_pabs_w:
632-
; AVX512VL: ## BB#0:
633-
; AVX512VL-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0]
634-
; AVX512VL-NEXT: retl ## encoding: [0xc3]
635-
%res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
636-
ret <16 x i16> %res
637-
}
638-
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
639-
640-
641592
define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
642593
; CHECK-LABEL: test_x86_avx2_phadd_d:
643594
; CHECK: ## BB#0:
@@ -1308,18 +1259,18 @@ define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {
13081259
; AVX2: ## BB#0:
13091260
; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
13101261
; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
1311-
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
1312-
; AVX2-NEXT: vpsravd LCPI88_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1313-
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
1262+
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4
1263+
; AVX2-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1264+
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4
13141265
; AVX2-NEXT: retl ## encoding: [0xc3]
13151266
;
13161267
; AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
13171268
; AVX512VL: ## BB#0:
1318-
; AVX512VL-NEXT: vmovdqa LCPI88_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
1269+
; AVX512VL-NEXT: vmovdqa LCPI85_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
13191270
; AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
1320-
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
1321-
; AVX512VL-NEXT: vpsravd LCPI88_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1322-
; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
1271+
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4
1272+
; AVX512VL-NEXT: vpsravd LCPI85_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
1273+
; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4
13231274
; AVX512VL-NEXT: retl ## encoding: [0xc3]
13241275
%res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
13251276
ret <4 x i32> %res
@@ -1345,18 +1296,18 @@ define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1)
13451296
; AVX2: ## BB#0:
13461297
; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
13471298
; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
1348-
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4
1349-
; AVX2-NEXT: vpsravd LCPI90_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1350-
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4
1299+
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4
1300+
; AVX2-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1301+
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4
13511302
; AVX2-NEXT: retl ## encoding: [0xc3]
13521303
;
13531304
; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
13541305
; AVX512VL: ## BB#0:
1355-
; AVX512VL-NEXT: vmovdqa LCPI90_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
1306+
; AVX512VL-NEXT: vmovdqa LCPI87_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
13561307
; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
1357-
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4
1358-
; AVX512VL-NEXT: vpsravd LCPI90_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1359-
; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4
1308+
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4
1309+
; AVX512VL-NEXT: vpsravd LCPI87_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
1310+
; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4
13601311
; AVX512VL-NEXT: retl ## encoding: [0xc3]
13611312
%res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
13621313
ret <8 x i32> %res
@@ -1418,7 +1369,7 @@ declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
14181369
define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
14191370
; CHECK-LABEL: test_x86_avx2_gather_d_ps:
14201371
; CHECK: ## BB#0:
1421-
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1372+
; /CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
14221373
; CHECK-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48]
14231374
; CHECK-NEXT: retl ## encoding: [0xc3]
14241375
%res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,

0 commit comments

Comments
 (0)