Skip to content

Commit bd0f271

Browse files
committed
[ARM][MVE] Add intrinsics for immediate shifts. (reland)
This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen, MarkMurrayARM Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065
1 parent c8b74ee commit bd0f271

File tree

8 files changed

+1274
-54
lines changed

8 files changed

+1274
-54
lines changed

clang/include/clang/Basic/arm_mve.td

+27
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,33 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
609609
defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
610610
defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
611611

612+
multiclass PredicatedImmediateVectorShift<
613+
Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> {
614+
foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in {
615+
def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v,
616+
immtype:$sh, Predicate:$pred),
617+
!con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
618+
(predIntr $pred, $inactive))>;
619+
def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh,
620+
Predicate:$pred),
621+
!con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
622+
(predIntr $pred, (undef Vector)))>;
623+
}
624+
}
625+
626+
let params = T.Int in {
627+
def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh),
628+
(shl $v, (splat (Scalar $sh)))>;
629+
defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">;
630+
631+
let pnt = PNT_NType in {
632+
def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh),
633+
(immshr $v, $sh, (unsignedflag Scalar))>;
634+
defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated",
635+
[(unsignedflag Scalar)]>;
636+
}
637+
}
638+
612639
// Base class for the scalar shift intrinsics.
613640
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
614641
Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {

clang/include/clang/Basic/arm_mve_defs.td

+6-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">;
6666
def sub: IRBuilder<"CreateSub">;
6767
def shl: IRBuilder<"CreateShl">;
6868
def lshr: IRBuilder<"CreateLShr">;
69+
def immshr: CGHelperFn<"MVEImmediateShr"> {
70+
let special_params = [IRBuilderIntParam<1, "unsigned">,
71+
IRBuilderIntParam<2, "bool">];
72+
}
6973
def fadd: IRBuilder<"CreateFAdd">;
7074
def fmul: IRBuilder<"CreateFMul">;
7175
def fsub: IRBuilder<"CreateFSub">;
@@ -318,8 +322,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
318322
//
319323
// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
320324
// inclusive.
321-
def imm_1toN : Immediate<u32, IB_EltBit<1>>;
322-
def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
325+
def imm_1toN : Immediate<sint, IB_EltBit<1>>;
326+
def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
323327

324328
// imm_lane has to be the index of a vector lane in the main vector type, i.e
325329
// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)

clang/lib/CodeGen/CGBuiltin.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -6916,13 +6916,43 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
69166916
}
69176917
}
69186918

6919+
template<typename Integer>
6920+
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
6921+
llvm::APSInt IntVal;
6922+
bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
6923+
assert(IsConst && "Sema should have checked this was a constant");
6924+
(void)IsConst;
6925+
return IntVal.getExtValue();
6926+
}
6927+
69196928
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
69206929
llvm::Type *T, bool Unsigned) {
69216930
// Helper function called by Tablegen-constructed ARM MVE builtin codegen,
69226931
// which finds it convenient to specify signed/unsigned as a boolean flag.
69236932
return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
69246933
}
69256934

6935+
static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
6936+
uint32_t Shift, bool Unsigned) {
6937+
// MVE helper function for integer shift right. This must handle signed vs
6938+
// unsigned, and also deal specially with the case where the shift count is
6939+
// equal to the lane size. In LLVM IR, an LShr with that parameter would be
6940+
// undefined behavior, but in MVE it's legal, so we must convert it to code
6941+
// that is not undefined in IR.
6942+
unsigned LaneBits =
6943+
V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
6944+
if (Shift == LaneBits) {
6945+
// An unsigned shift of the full lane size always generates zero, so we can
6946+
// simply emit a zero vector. A signed shift of the full lane size does the
6947+
// same thing as shifting by one bit fewer.
6948+
if (Unsigned)
6949+
return llvm::Constant::getNullValue(V->getType());
6950+
else
6951+
--Shift;
6952+
}
6953+
return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
6954+
}
6955+
69266956
static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
69276957
// MVE-specific helper function for a vector splat, which infers the element
69286958
// count of the output vector by knowing that MVE vectors are all 128 bits

0 commit comments

Comments
 (0)