Skip to content

Commit 6fc0312

Browse files
authored
[Clang][AArch64] Add fp8 variants for untyped NEON intrinsics (#128019)
This patch adds fp8 variants to existing intrinsics, whose operation doesn't depend on arguments being a specific type. It also changes mfloat8 type representation in memory from `i8` to `<1xi8>`
1 parent 43db72d commit 6fc0312

File tree

11 files changed

+1307
-112
lines changed

11 files changed

+1307
-112
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,10 @@ def OP_CVT_F32_BF16
279279

280280
// Splat operation - performs a range-checked splat over a vector
281281
def SPLAT : WInst<"splat_lane", ".(!q)I",
282-
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
282+
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm",
283283
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
284284
def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
285-
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
285+
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm",
286286
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
287287

288288
let TargetGuard = "bf16,neon" in {
@@ -547,40 +547,40 @@ def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
547547
// E.3.16 Extract lanes from a vector
548548
let InstName = "vmov" in
549549
def VGET_LANE : IInst<"vget_lane", "1.I",
550-
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
550+
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm",
551551
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
552552

553553
////////////////////////////////////////////////////////////////////////////////
554554
// E.3.17 Set lanes within a vector
555555
let InstName = "vmov" in
556556
def VSET_LANE : IInst<"vset_lane", ".1.I",
557-
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
557+
"UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm",
558558
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;
559559

560560
////////////////////////////////////////////////////////////////////////////////
561561
// E.3.18 Initialize a vector from bit pattern
562-
def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> {
562+
def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPslm", OP_CAST> {
563563
let BigEndianSafe = 1;
564564
}
565565

566566
////////////////////////////////////////////////////////////////////////////////
567567
// E.3.19 Set all lanes to same value
568568
let InstName = "vmov" in {
569569
def VDUP_N : WOpInst<"vdup_n", ".1",
570-
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
570+
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
571571
OP_DUP>;
572572
def VMOV_N : WOpInst<"vmov_n", ".1",
573-
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
573+
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
574574
OP_DUP>;
575575
}
576576
let InstName = "" in
577577
def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
578-
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
578+
"UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm",
579579
OP_DUP_LN>;
580580

581581
////////////////////////////////////////////////////////////////////////////////
582582
// E.3.20 Combining vectors
583-
def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>;
583+
def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPsm", OP_CONC>;
584584

585585
////////////////////////////////////////////////////////////////////////////////
586586
// E.3.21 Splitting vectors
@@ -589,8 +589,8 @@ def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>;
589589
// versions of these intrinsics in both AArch32 and AArch64 architectures. See
590590
// D45668 for more details.
591591
let InstName = "vmov" in {
592-
def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>;
593-
def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>;
592+
def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPsm", OP_HI>;
593+
def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPsm", OP_LO>;
594594
}
595595

596596
////////////////////////////////////////////////////////////////////////////////
@@ -619,16 +619,16 @@ def VQMOVUN : SInst<"vqmovun", "(<U)Q", "sil">;
619619
////////////////////////////////////////////////////////////////////////////////
620620
// E.3.23-24 Table lookup, Extended table lookup
621621
let InstName = "vtbl" in {
622-
def VTBL1 : WInst<"vtbl1", "..p", "UccPc">;
623-
def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">;
624-
def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">;
625-
def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">;
622+
def VTBL1 : WInst<"vtbl1", "..p", "UccPcm">;
623+
def VTBL2 : WInst<"vtbl2", ".2p", "UccPcm">;
624+
def VTBL3 : WInst<"vtbl3", ".3p", "UccPcm">;
625+
def VTBL4 : WInst<"vtbl4", ".4p", "UccPcm">;
626626
}
627627
let InstName = "vtbx" in {
628-
def VTBX1 : WInst<"vtbx1", "...p", "UccPc">;
629-
def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">;
630-
def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">;
631-
def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">;
628+
def VTBX1 : WInst<"vtbx1", "...p", "UccPcm">;
629+
def VTBX2 : WInst<"vtbx2", "..2p", "UccPcm">;
630+
def VTBX3 : WInst<"vtbx3", "..3p", "UccPcm">;
631+
def VTBX4 : WInst<"vtbx4", "..4p", "UccPcm">;
632632
}
633633

634634
////////////////////////////////////////////////////////////////////////////////
@@ -677,15 +677,15 @@ def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
677677
////////////////////////////////////////////////////////////////////////////////
678678
// E.3.26 Vector Extract
679679
def VEXT : WInst<"vext", "...I",
680-
"cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
680+
"cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQfmQm",
681681
[ImmCheck<2, ImmCheckLaneIndex, 0>]>;
682682

683683
////////////////////////////////////////////////////////////////////////////////
684684
// E.3.27 Reverse vector elements
685-
def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf",
685+
def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQfmQm",
686686
OP_REV64>;
687-
def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>;
688-
def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>;
687+
def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPsmQm", OP_REV32>;
688+
def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPcmQm", OP_REV16>;
689689

690690
////////////////////////////////////////////////////////////////////////////////
691691
// E.3.28 Other single operand arithmetic
@@ -709,13 +709,13 @@ def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;
709709
def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>;
710710
let isHiddenLInst = 1 in
711711
def VBSL : SInst<"vbsl", ".U..",
712-
"csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
712+
"csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsmQm">;
713713

714714
////////////////////////////////////////////////////////////////////////////////
715715
// E.3.30 Transposition operations
716-
def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
717-
def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
718-
def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
716+
def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;
717+
def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;
718+
def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">;
719719

720720
////////////////////////////////////////////////////////////////////////////////
721721

@@ -1028,19 +1028,19 @@ def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
10281028
def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
10291029
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;
10301030
def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
1031-
"csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
1031+
"csilUcUsUiUlPcPsPlfdm", OP_COPY_LN>;
10321032
def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
1033-
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
1033+
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>;
10341034
def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
1035-
"csilPcPsPlUcUsUiUlfd", OP_COPY_LN>;
1035+
"csilPcPsPlUcUsUiUlfdm", OP_COPY_LN>;
10361036
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
1037-
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
1037+
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>;
10381038

10391039
////////////////////////////////////////////////////////////////////////////////
10401040
// Set all lanes to same value
10411041
def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
10421042
def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
1043-
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
1043+
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPlmQm",
10441044
OP_DUP_LN>;
10451045
def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
10461046
def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
@@ -1266,31 +1266,31 @@ def FMINNM_S64 : SInst<"vminnm", "...", "dQd">;
12661266
////////////////////////////////////////////////////////////////////////////////
12671267
// Permutation
12681268
def VTRN1 : SOpInst<"vtrn1", "...",
1269-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;
1269+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN1>;
12701270
def VZIP1 : SOpInst<"vzip1", "...",
1271-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>;
1271+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP1>;
12721272
def VUZP1 : SOpInst<"vuzp1", "...",
1273-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>;
1273+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP1>;
12741274
def VTRN2 : SOpInst<"vtrn2", "...",
1275-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>;
1275+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN2>;
12761276
def VZIP2 : SOpInst<"vzip2", "...",
1277-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>;
1277+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP2>;
12781278
def VUZP2 : SOpInst<"vuzp2", "...",
1279-
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>;
1279+
"csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP2>;
12801280

12811281
////////////////////////////////////////////////////////////////////////////////
12821282
// Table lookup
12831283
let InstName = "vtbl" in {
1284-
def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">;
1285-
def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">;
1286-
def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">;
1287-
def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">;
1284+
def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPcmQm">;
1285+
def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPcmQm">;
1286+
def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPcmQm">;
1287+
def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPcmQm">;
12881288
}
12891289
let InstName = "vtbx" in {
1290-
def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">;
1291-
def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">;
1292-
def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">;
1293-
def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">;
1290+
def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPcmQm">;
1291+
def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPcmQm">;
1292+
def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPcmQm">;
1293+
def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPcmQm">;
12941294
}
12951295

12961296
////////////////////////////////////////////////////////////////////////////////
@@ -1654,9 +1654,9 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_Q
16541654
def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>;
16551655
} // TargetGuard = "v8.1a"
16561656

1657-
def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
1657+
def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm",
16581658
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
1659-
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
1659+
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm",
16601660
[ImmCheck<1, ImmCheckLaneIndex, 0>]>;
16611661

16621662
} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
@@ -2090,17 +2090,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r
20902090

20912091
// Lookup table read with 2-bit/4-bit indices
20922092
let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
2093-
def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc",
2093+
def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcmQcQUcQPcQm",
20942094
[ImmCheck<2, ImmCheck0_1>]>;
2095-
def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc",
2095+
def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcmQcQUcQPcQm",
20962096
[ImmCheck<2, ImmCheck0_3>]>;
20972097
def VLUTI2_H : SInst<"vluti2_lane", "Q.(<qU)I", "sUsPshQsQUsQPsQh",
20982098
[ImmCheck<2, ImmCheck0_3>]>;
20992099
def VLUTI2_H_Q : SInst<"vluti2_laneq", "Q.(<QU)I", "sUsPshQsQUsQPsQh",
21002100
[ImmCheck<2, ImmCheck0_7>]>;
2101-
def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc",
2101+
def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPcQm",
21022102
[ImmCheck<2, ImmCheck0_0>]>;
2103-
def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPc",
2103+
def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPcQm",
21042104
[ImmCheck<2, ImmCheck0_1>]>;
21052105
def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh",
21062106
[ImmCheck<3, ImmCheck0_1>]>;

clang/lib/AST/Type.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,6 +2833,11 @@ static bool isTriviallyCopyableTypeImpl(const QualType &type,
28332833
if (CanonicalType->isScalarType() || CanonicalType->isVectorType())
28342834
return true;
28352835

2836+
// Mfloat8 type is a special case as it not scalar, but is still trivially
2837+
// copyable.
2838+
if (CanonicalType->isMFloat8Type())
2839+
return true;
2840+
28362841
if (const auto *RT = CanonicalType->getAs<RecordType>()) {
28372842
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
28382843
if (IsCopyConstructible) {

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,6 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
108108
MT->getNumRows() * MT->getNumColumns());
109109
}
110110

111-
if (T->isMFloat8Type())
112-
return llvm::Type::getInt8Ty(getLLVMContext());
113-
114111
llvm::Type *R = ConvertType(T);
115112

116113
// Check for the boolean vector case.

0 commit comments

Comments
 (0)