Skip to content

Commit a90026c

Browse files
committed
Divide vectors that surpass 4 element limit
1 parent 3c74bfe commit a90026c

File tree

3 files changed

+230
-27
lines changed

3 files changed

+230
-27
lines changed

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ def CountBits : DXILOp<31, unaryBits> {
621621
def FirstbitLo : DXILOp<32, unaryBits> {
622622
let Doc = "Returns the location of the first set bit starting from "
623623
"the lowest order bit and working upward.";
624-
let LLVMIntrinsic = int_dx_firstbitlow;
624+
let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
625625
let arguments = [OverloadTy];
626626
let result = Int32Ty;
627627
let overloads =

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 112 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
108108
unsigned Opcode) const;
109109

110110
bool selectFirstBitSet64(Register ResVReg, const SPIRVType *ResType,
111-
MachineInstr &I, unsigned BitSetOpcode,
112-
bool SwapPrimarySide) const;
111+
MachineInstr &I, Register SrcReg,
112+
unsigned BitSetOpcode, bool SwapPrimarySide) const;
113113

114114
bool selectGlobalValue(Register ResVReg, MachineInstr &I,
115115
const MachineInstr *Init = nullptr) const;
@@ -3171,23 +3171,116 @@ bool SPIRVInstructionSelector::selectFirstBitSet32(Register ResVReg,
31713171
.constrainAllUses(TII, TRI, RBI);
31723172
}
31733173

3174-
bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
3175-
const SPIRVType *ResType,
3176-
MachineInstr &I,
3177-
unsigned BitSetOpcode,
3178-
bool SwapPrimarySide) const {
3179-
Register OpReg = I.getOperand(2).getReg();
3180-
3181-
// 1. Split int64 into 2 pieces using a bitcast
3174+
bool SPIRVInstructionSelector::selectFirstBitSet64(
3175+
Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
3176+
Register SrcReg, unsigned BitSetOpcode, bool SwapPrimarySide) const {
31823177
unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType);
31833178
SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType);
3179+
bool ZeroAsNull = STI.isOpenCLEnv();
3180+
Register ConstIntZero =
3181+
GR.getOrCreateConstInt(0, I, BaseType, TII, ZeroAsNull);
3182+
Register ConstIntOne =
3183+
GR.getOrCreateConstInt(1, I, BaseType, TII, ZeroAsNull);
3184+
3185+
// SPIRV doesn't support vectors with more than 4 components. Since the
3186+
// algoritm below converts i64 -> i32x2 and i64x4 -> i32x8 it can only
3187+
// operate on vectors with 2 or less components. When largers vectors are
3188+
// seen. Split them, recurse, then recombine them.
3189+
if (ComponentCount > 2) {
3190+
unsigned LeftComponentCount = ComponentCount / 2;
3191+
unsigned RightComponentCount = ComponentCount - LeftComponentCount;
3192+
bool LeftIsVector = LeftComponentCount > 1;
3193+
3194+
// Split the SrcReg in half into 2 smaller vec registers
3195+
// (ie i64x4 -> i64x2, i64x2)
3196+
MachineIRBuilder MIRBuilder(I);
3197+
SPIRVType *OpType = GR.getOrCreateSPIRVIntegerType(64, MIRBuilder);
3198+
SPIRVType *LeftVecOpType;
3199+
SPIRVType *LeftVecResType;
3200+
if (LeftIsVector) {
3201+
LeftVecOpType =
3202+
GR.getOrCreateSPIRVVectorType(OpType, LeftComponentCount, MIRBuilder);
3203+
LeftVecResType = GR.getOrCreateSPIRVVectorType(
3204+
BaseType, LeftComponentCount, MIRBuilder);
3205+
} else {
3206+
LeftVecOpType = OpType;
3207+
LeftVecResType = BaseType;
3208+
}
3209+
3210+
SPIRVType *RightVecOpType =
3211+
GR.getOrCreateSPIRVVectorType(OpType, RightComponentCount, MIRBuilder);
3212+
SPIRVType *RightVecResType = GR.getOrCreateSPIRVVectorType(
3213+
BaseType, RightComponentCount, MIRBuilder);
3214+
3215+
Register LeftSideIn =
3216+
MRI->createVirtualRegister(GR.getRegClass(LeftVecOpType));
3217+
Register RightSideIn =
3218+
MRI->createVirtualRegister(GR.getRegClass(RightVecOpType));
3219+
3220+
bool Result;
3221+
3222+
if (LeftIsVector) {
3223+
auto MIB =
3224+
BuildMI(*I.getParent(), I, I.getDebugLoc(),
3225+
TII.get(SPIRV::OpVectorShuffle))
3226+
.addDef(LeftSideIn)
3227+
.addUse(GR.getSPIRVTypeID(LeftVecOpType))
3228+
.addUse(SrcReg)
3229+
// Per the spec, repeat the vector if only one vec is needed
3230+
.addUse(SrcReg);
3231+
3232+
for (unsigned J = 0; J < LeftComponentCount; J++) {
3233+
MIB.addImm(J);
3234+
}
3235+
3236+
Result = MIB.constrainAllUses(TII, TRI, RBI);
3237+
} else {
3238+
Result =
3239+
selectOpWithSrcs(LeftSideIn, LeftVecOpType, I, {SrcReg, ConstIntZero},
3240+
SPIRV::OpVectorExtractDynamic);
3241+
}
3242+
3243+
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
3244+
TII.get(SPIRV::OpVectorShuffle))
3245+
.addDef(RightSideIn)
3246+
.addUse(GR.getSPIRVTypeID(RightVecOpType))
3247+
.addUse(SrcReg)
3248+
// Per the spec, repeat the vector if only one vec is needed
3249+
.addUse(SrcReg);
3250+
3251+
for (unsigned J = LeftComponentCount; J < ComponentCount; J++) {
3252+
MIB.addImm(J);
3253+
}
3254+
3255+
Result = Result && MIB.constrainAllUses(TII, TRI, RBI);
3256+
3257+
// Recursively call selectFirstBitSet64 on the 2 registers
3258+
Register LeftSideOut =
3259+
MRI->createVirtualRegister(GR.getRegClass(LeftVecResType));
3260+
Register RightSideOut =
3261+
MRI->createVirtualRegister(GR.getRegClass(RightVecResType));
3262+
Result = Result &&
3263+
selectFirstBitSet64(LeftSideOut, LeftVecResType, I, LeftSideIn,
3264+
BitSetOpcode, SwapPrimarySide);
3265+
Result = Result &&
3266+
selectFirstBitSet64(RightSideOut, RightVecResType, I, RightSideIn,
3267+
BitSetOpcode, SwapPrimarySide);
3268+
3269+
// Join the two resulting registers back into the return type
3270+
// (ie i32x2, i32x2 -> i32x4)
3271+
return Result &&
3272+
selectOpWithSrcs(ResVReg, ResType, I, {LeftSideOut, RightSideOut},
3273+
SPIRV::OpCompositeConstruct);
3274+
}
3275+
3276+
// 1. Split int64 into 2 pieces using a bitcast
31843277
MachineIRBuilder MIRBuilder(I);
31853278
SPIRVType *PostCastType =
31863279
GR.getOrCreateSPIRVVectorType(BaseType, 2 * ComponentCount, MIRBuilder);
31873280
Register BitcastReg =
31883281
MRI->createVirtualRegister(GR.getRegClass(PostCastType));
31893282
bool Result =
3190-
selectOpWithSrcs(BitcastReg, PostCastType, I, {OpReg}, SPIRV::OpBitcast);
3283+
selectOpWithSrcs(BitcastReg, PostCastType, I, {SrcReg}, SPIRV::OpBitcast);
31913284

31923285
// 2. Find the first set bit from the primary side for all the pieces in #1
31933286
Register FBSReg = MRI->createVirtualRegister(GR.getRegClass(PostCastType));
@@ -3198,20 +3291,15 @@ bool SPIRVInstructionSelector::selectFirstBitSet64(Register ResVReg,
31983291
Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
31993292
Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType));
32003293

3201-
bool ZeroAsNull = STI.isOpenCLEnv();
32023294
bool IsScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector;
32033295
if (IsScalarRes) {
32043296
// if scalar do a vector extract
3205-
Result = Result &&
3206-
selectOpWithSrcs(HighReg, ResType, I,
3207-
{FBSReg, GR.getOrCreateConstInt(0, I, ResType,
3208-
TII, ZeroAsNull)},
3209-
SPIRV::OpVectorExtractDynamic);
3210-
Result = Result &&
3211-
selectOpWithSrcs(LowReg, ResType, I,
3212-
{FBSReg, GR.getOrCreateConstInt(1, I, ResType,
3213-
TII, ZeroAsNull)},
3214-
SPIRV::OpVectorExtractDynamic);
3297+
Result =
3298+
Result && selectOpWithSrcs(HighReg, ResType, I, {FBSReg, ConstIntZero},
3299+
SPIRV::OpVectorExtractDynamic);
3300+
Result =
3301+
Result && selectOpWithSrcs(LowReg, ResType, I, {FBSReg, ConstIntOne},
3302+
SPIRV::OpVectorExtractDynamic);
32153303
} else {
32163304
// if vector do a shufflevector
32173305
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
@@ -3324,7 +3412,7 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
33243412
case 32:
33253413
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
33263414
case 64:
3327-
return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
3415+
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
33283416
/*SwapPrimarySide=*/false);
33293417
default:
33303418
report_fatal_error(
@@ -3350,7 +3438,7 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
33503438
case 32:
33513439
return selectFirstBitSet32(ResVReg, ResType, I, OpReg, BitSetOpcode);
33523440
case 64:
3353-
return selectFirstBitSet64(ResVReg, ResType, I, BitSetOpcode,
3441+
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
33543442
/*SwapPrimarySide=*/true);
33553443
default:
33563444
report_fatal_error("spv_firstbitlow only supports 16,32,64 bits.");

llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; CHECK-DAG: OpMemoryModel Logical GLSL450
66
; CHECK-DAG: [[u32_t:%.+]] = OpTypeInt 32 0
77
; CHECK-DAG: [[u32x2_t:%.+]] = OpTypeVector [[u32_t]] 2
8+
; CHECK-DAG: [[u32x3_t:%.+]] = OpTypeVector [[u32_t]] 3
89
; CHECK-DAG: [[u32x4_t:%.+]] = OpTypeVector [[u32_t]] 4
910
; CHECK-DAG: [[const_0:%.*]] = OpConstant [[u32_t]] 0
1011
; CHECK-DAG: [[const_0x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_0]] [[const_0]]
@@ -15,8 +16,12 @@
1516
; CHECK-DAG: [[const_neg1x2:%.*]] = OpConstantComposite [[u32x2_t]] [[const_neg1]] [[const_neg1]]
1617
; CHECK-DAG: [[u16_t:%.+]] = OpTypeInt 16 0
1718
; CHECK-DAG: [[u16x2_t:%.+]] = OpTypeVector [[u16_t]] 2
19+
; CHECK-DAG: [[u16x3_t:%.+]] = OpTypeVector [[u16_t]] 3
20+
; CHECK-DAG: [[u16x4_t:%.+]] = OpTypeVector [[u16_t]] 4
1821
; CHECK-DAG: [[u64_t:%.+]] = OpTypeInt 64 0
1922
; CHECK-DAG: [[u64x2_t:%.+]] = OpTypeVector [[u64_t]] 2
23+
; CHECK-DAG: [[u64x3_t:%.+]] = OpTypeVector [[u64_t]] 3
24+
; CHECK-DAG: [[u64x4_t:%.+]] = OpTypeVector [[u64_t]] 4
2025
; CHECK-DAG: [[bool_t:%.+]] = OpTypeBool
2126
; CHECK-DAG: [[boolx2_t:%.+]] = OpTypeVector [[bool_t]] 2
2227

@@ -30,8 +35,8 @@ entry:
3035
ret i32 %elt.firstbitlow
3136
}
3237

33-
; CHECK-LABEL: Begin function firstbitlow_2xi32
34-
define noundef <2 x i32> @firstbitlow_2xi32(<2 x i32> noundef %a) {
38+
; CHECK-LABEL: Begin function firstbitlow_v2xi32
39+
define noundef <2 x i32> @firstbitlow_v2xi32(<2 x i32> noundef %a) {
3540
entry:
3641
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x2_t]]
3742
; CHECK: [[ret:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[a]]
@@ -40,6 +45,26 @@ entry:
4045
ret <2 x i32> %elt.firstbitlow
4146
}
4247

48+
; CHECK-LABEL: Begin function firstbitlow_v3xi32
49+
define noundef <3 x i32> @firstbitlow_v3xi32(<3 x i32> noundef %a) {
50+
entry:
51+
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x3_t]]
52+
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a]]
53+
; CHECK: OpReturnValue [[ret]]
54+
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i32(<3 x i32> %a)
55+
ret <3 x i32> %elt.firstbitlow
56+
}
57+
58+
; CHECK-LABEL: Begin function firstbitlow_v4xi32
59+
define noundef <4 x i32> @firstbitlow_v4xi32(<4 x i32> noundef %a) {
60+
entry:
61+
; CHECK: [[a:%.+]] = OpFunctionParameter [[u32x4_t]]
62+
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a]]
63+
; CHECK: OpReturnValue [[ret]]
64+
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i32(<4 x i32> %a)
65+
ret <4 x i32> %elt.firstbitlow
66+
}
67+
4368
; CHECK-LABEL: Begin function firstbitlow_i16
4469
define noundef i32 @firstbitlow_i16(i16 noundef %a) {
4570
entry:
@@ -62,6 +87,28 @@ entry:
6287
ret <2 x i32> %elt.firstbitlow
6388
}
6489

90+
; CHECK-LABEL: Begin function firstbitlow_v3xi16
91+
define noundef <3 x i32> @firstbitlow_v3xi16(<3 x i16> noundef %a) {
92+
entry:
93+
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x3_t]]
94+
; CHECK: [[a32:%.+]] = OpUConvert [[u32x3_t]] [[a16]]
95+
; CHECK: [[ret:%.+]] = OpExtInst [[u32x3_t]] [[glsl_450_ext]] FindILsb [[a32]]
96+
; CHECK: OpReturnValue [[ret]]
97+
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i16(<3 x i16> %a)
98+
ret <3 x i32> %elt.firstbitlow
99+
}
100+
101+
; CHECK-LABEL: Begin function firstbitlow_v4xi16
102+
define noundef <4 x i32> @firstbitlow_v4xi16(<4 x i16> noundef %a) {
103+
entry:
104+
; CHECK: [[a16:%.+]] = OpFunctionParameter [[u16x4_t]]
105+
; CHECK: [[a32:%.+]] = OpUConvert [[u32x4_t]] [[a16]]
106+
; CHECK: [[ret:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[a32]]
107+
; CHECK: OpReturnValue [[ret]]
108+
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i16(<4 x i16> %a)
109+
ret <4 x i32> %elt.firstbitlow
110+
}
111+
65112
; CHECK-LABEL: Begin function firstbitlow_i64
66113
define noundef i32 @firstbitlow_i64(i64 noundef %a) {
67114
entry:
@@ -96,6 +143,74 @@ entry:
96143
ret <2 x i32> %elt.firstbitlow
97144
}
98145

146+
; CHECK-LABEL: Begin function firstbitlow_v3i64
147+
define noundef <3 x i32> @firstbitlow_v3i64(<3 x i64> noundef %a) {
148+
entry:
149+
; Split the i64x3 into i64, i64x2
150+
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x3_t]]
151+
; CHECK: [[left:%.+]] = OpVectorExtractDynamic [[u64_t]] [[a]] [[const_0]]
152+
; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 1 2
153+
154+
; Do firstbitlow on i64, i64x2
155+
; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x2_t]] [[left]]
156+
; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x2_t]] [[glsl_450_ext]] FindILsb [[left_cast]]
157+
; CHECK: [[left_high_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_0]]
158+
; CHECK: [[left_low_bits:%.+]] = OpVectorExtractDynamic [[u32_t]] [[left_lsb_bits]] [[const_1]]
159+
; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[bool_t]] [[left_low_bits]] [[const_neg1]]
160+
; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]]
161+
; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32_t]] [[left_should_use_high]] [[const_32]] [[const_0]]
162+
; CHECK: [[left_res:%.+]] = OpIAdd [[u32_t]] [[left_ans_offset]] [[left_ans_bits]]
163+
164+
; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
165+
; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]]
166+
; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
167+
; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
168+
; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]]
169+
; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]]
170+
; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]]
171+
; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
172+
173+
; Merge the resulting i32, i32x2 into the final i32x3 and return it
174+
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x3_t]] [[left_res]] [[right_res]]
175+
; CHECK: OpReturnValue [[ret]]
176+
%elt.firstbitlow = call <3 x i32> @llvm.spv.firstbitlow.v3i64(<3 x i64> %a)
177+
ret <3 x i32> %elt.firstbitlow
178+
}
179+
180+
; CHECK-LABEL: Begin function firstbitlow_v4i64
181+
define noundef <4 x i32> @firstbitlow_v4i64(<4 x i64> noundef %a) {
182+
entry:
183+
; Split the i64x4 into 2 i64x2
184+
; CHECK: [[a:%.+]] = OpFunctionParameter [[u64x4_t]]
185+
; CHECK: [[left:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 0 1
186+
; CHECK: [[right:%.+]] = OpVectorShuffle [[u64x2_t]] [[a]] [[a]] 2 3
187+
188+
; Do firstbitlow on the 2 i64x2
189+
; CHECK: [[left_cast:%.+]] = OpBitcast [[u32x4_t]] [[left]]
190+
; CHECK: [[left_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[left_cast]]
191+
; CHECK: [[left_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 0 2
192+
; CHECK: [[left_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[left_lsb_bits]] [[left_lsb_bits]] 1 3
193+
; CHECK: [[left_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[left_low_bits]] [[const_neg1x2]]
194+
; CHECK: [[left_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[left_high_bits]] [[left_low_bits]]
195+
; CHECK: [[left_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[left_should_use_high]] [[const_32x2]] [[const_0x2]]
196+
; CHECK: [[left_res:%.+]] = OpIAdd [[u32x2_t]] [[left_ans_offset]] [[left_ans_bits]]
197+
198+
; CHECK: [[right_cast:%.+]] = OpBitcast [[u32x4_t]] [[right]]
199+
; CHECK: [[right_lsb_bits:%.+]] = OpExtInst [[u32x4_t]] [[glsl_450_ext]] FindILsb [[right_cast]]
200+
; CHECK: [[right_high_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 0 2
201+
; CHECK: [[right_low_bits:%.+]] = OpVectorShuffle [[u32x2_t]] [[right_lsb_bits]] [[right_lsb_bits]] 1 3
202+
; CHECK: [[right_should_use_high:%.+]] = OpIEqual [[boolx2_t]] [[right_low_bits]] [[const_neg1x2]]
203+
; CHECK: [[right_ans_bits:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[right_high_bits]] [[right_low_bits]]
204+
; CHECK: [[right_ans_offset:%.+]] = OpSelect [[u32x2_t]] [[right_should_use_high]] [[const_32x2]] [[const_0x2]]
205+
; CHECK: [[right_res:%.+]] = OpIAdd [[u32x2_t]] [[right_ans_offset]] [[right_ans_bits]]
206+
207+
; Merge the resulting 2 i32x2 into the final i32x4 and return it
208+
; CHECK: [[ret:%.+]] = OpCompositeConstruct [[u32x4_t]] [[left_res]] [[right_res]]
209+
; CHECK: OpReturnValue [[ret]]
210+
%elt.firstbitlow = call <4 x i32> @llvm.spv.firstbitlow.v4i64(<4 x i64> %a)
211+
ret <4 x i32> %elt.firstbitlow
212+
}
213+
99214
;declare i16 @llvm.spv.firstbitlow.i16(i16)
100215
;declare i32 @llvm.spv.firstbitlow.i32(i32)
101216
;declare i64 @llvm.spv.firstbitlow.i64(i64)

0 commit comments

Comments
 (0)