Skip to content

Commit 3e948eb

Browse files
authored
[AArch64][NEON] Add intrinsics for LUTI (#96883)
This patch adds intrinsics for NEON LUTI2 and LUTI4 instructions as specified in the [ACLE proposal](ARM-software/acle#324)
1 parent 59093ca commit 3e948eb

File tree

7 files changed

+949
-7
lines changed

7 files changed

+949
-7
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,3 +2096,22 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r
20962096
def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
20972097
def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
20982098
}
2099+
2100+
// Lookup table read with 2-bit/4-bit indices
2101+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
2102+
def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc">;
2103+
def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc">;
2104+
def VLUTI2_H : SInst<"vluti2_lane", "Q.(<qU)I", "sUsPshQsQUsQPsQh">;
2105+
def VLUTI2_H_Q : SInst<"vluti2_laneq", "Q.(<QU)I", "sUsPshQsQUsQPsQh">;
2106+
def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc">;
2107+
def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPc">;
2108+
def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "QsQUsQPsQh">;
2109+
def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "QsQUsQPsQh">;
2110+
2111+
let ArchGuard = "defined(__aarch64__)", TargetGuard= "lut,bf16" in {
2112+
def VLUTI2_BF : SInst<"vluti2_lane", "Q.(<qU)I", "bQb">;
2113+
def VLUTI2_BF_Q : SInst<"vluti2_laneq", "Q.(<QU)I", "bQb">;
2114+
def VLUTI4_BF_X2 : SInst<"vluti4_lane_x2", ".2(<qU)I", "Qb">;
2115+
def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb">;
2116+
}
2117+
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13481,6 +13481,95 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1348113481
Int = Intrinsic::aarch64_neon_suqadd;
1348213482
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
1348313483
}
13484+
13485+
case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13486+
case NEON::BI__builtin_neon_vluti2_laneq_f16:
13487+
case NEON::BI__builtin_neon_vluti2_laneq_p16:
13488+
case NEON::BI__builtin_neon_vluti2_laneq_p8:
13489+
case NEON::BI__builtin_neon_vluti2_laneq_s16:
13490+
case NEON::BI__builtin_neon_vluti2_laneq_s8:
13491+
case NEON::BI__builtin_neon_vluti2_laneq_u16:
13492+
case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13493+
Int = Intrinsic::aarch64_neon_vluti2_laneq;
13494+
llvm::Type *Tys[2];
13495+
Tys[0] = Ty;
13496+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13497+
/*isQuad*/ false));
13498+
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13499+
}
13500+
case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13501+
case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13502+
case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13503+
case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13504+
case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13505+
case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13506+
case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13507+
case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13508+
Int = Intrinsic::aarch64_neon_vluti2_laneq;
13509+
llvm::Type *Tys[2];
13510+
Tys[0] = Ty;
13511+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13512+
/*isQuad*/ true));
13513+
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13514+
}
13515+
case NEON::BI__builtin_neon_vluti2_lane_bf16:
13516+
case NEON::BI__builtin_neon_vluti2_lane_f16:
13517+
case NEON::BI__builtin_neon_vluti2_lane_p16:
13518+
case NEON::BI__builtin_neon_vluti2_lane_p8:
13519+
case NEON::BI__builtin_neon_vluti2_lane_s16:
13520+
case NEON::BI__builtin_neon_vluti2_lane_s8:
13521+
case NEON::BI__builtin_neon_vluti2_lane_u16:
13522+
case NEON::BI__builtin_neon_vluti2_lane_u8: {
13523+
Int = Intrinsic::aarch64_neon_vluti2_lane;
13524+
llvm::Type *Tys[2];
13525+
Tys[0] = Ty;
13526+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13527+
/*isQuad*/ false));
13528+
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13529+
}
13530+
case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13531+
case NEON::BI__builtin_neon_vluti2q_lane_f16:
13532+
case NEON::BI__builtin_neon_vluti2q_lane_p16:
13533+
case NEON::BI__builtin_neon_vluti2q_lane_p8:
13534+
case NEON::BI__builtin_neon_vluti2q_lane_s16:
13535+
case NEON::BI__builtin_neon_vluti2q_lane_s8:
13536+
case NEON::BI__builtin_neon_vluti2q_lane_u16:
13537+
case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13538+
Int = Intrinsic::aarch64_neon_vluti2_lane;
13539+
llvm::Type *Tys[2];
13540+
Tys[0] = Ty;
13541+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13542+
/*isQuad*/ true));
13543+
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13544+
}
13545+
case NEON::BI__builtin_neon_vluti4q_lane_p8:
13546+
case NEON::BI__builtin_neon_vluti4q_lane_s8:
13547+
case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13548+
Int = Intrinsic::aarch64_neon_vluti4q_lane;
13549+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13550+
}
13551+
case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13552+
case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13553+
case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13554+
Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13555+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13556+
}
13557+
case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13558+
case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13559+
case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13560+
case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13561+
case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
13562+
Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
13563+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
13564+
}
13565+
case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
13566+
case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
13567+
case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
13568+
case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
13569+
case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
13570+
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
13571+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
13572+
}
1348413573
}
1348513574
}
1348613575

0 commit comments

Comments
 (0)