Skip to content

Commit 960fd47

Browse files
authored
Merge pull request #2764 from apple/fix-fp-round-20210107
[SelectionDAG] Don't scalarize vector fpround sources that don't need it.
2 parents 034e650 + 8eb970b commit 960fd47

File tree

2 files changed

+42
-4
lines changed

2 files changed

+42
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
318318
}
319319

320320
SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
321-
EVT NewVT = N->getValueType(0).getVectorElementType();
322-
SDValue Op = GetScalarizedVector(N->getOperand(0));
323-
return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
324-
NewVT, Op, N->getOperand(1));
321+
SDLoc DL(N);
322+
SDValue Op = N->getOperand(0);
323+
EVT OpVT = Op.getValueType();
324+
// The result needs scalarizing, but it's not a given that the source does.
325+
// See similar logic in ScalarizeVecRes_UnaryOp.
326+
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
327+
Op = GetScalarizedVector(Op);
328+
} else {
329+
EVT VT = OpVT.getVectorElementType();
330+
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
331+
DAG.getVectorIdxConstant(0, DL));
332+
}
333+
return DAG.getNode(ISD::FP_ROUND, DL,
334+
N->getValueType(0).getVectorElementType(), Op,
335+
N->getOperand(1));
325336
}
326337

327338
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,33 @@ define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
199199
ret <2 x float> %vcvt1.i
200200
}
201201

202+
define half @test_vcvt_f16_f32(<1 x float> %x) {
203+
; GENERIC-LABEL: test_vcvt_f16_f32:
204+
; GENERIC: // %bb.0:
205+
; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
206+
; GENERIC-NEXT: fcvt h0, s0
207+
; GENERIC-NEXT: ret
208+
;
209+
; FAST-LABEL: test_vcvt_f16_f32:
210+
; FAST: // %bb.0:
211+
; FAST-NEXT: mov.16b v1, v0
212+
; FAST-NEXT: // implicit-def: $q0
213+
; FAST-NEXT: mov.16b v0, v1
214+
; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0
215+
; FAST-NEXT: fcvt h0, s0
216+
; FAST-NEXT: ret
217+
;
218+
; GISEL-LABEL: test_vcvt_f16_f32:
219+
; GISEL: // %bb.0:
220+
; GISEL-NEXT: fmov x8, d0
221+
; GISEL-NEXT: fmov s0, w8
222+
; GISEL-NEXT: fcvt h0, s0
223+
; GISEL-NEXT: ret
224+
%tmp = fptrunc <1 x float> %x to <1 x half>
225+
%elt = extractelement <1 x half> %tmp, i32 0
226+
ret half %elt
227+
}
228+
202229
; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
203230
; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
204231
define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {

0 commit comments

Comments
 (0)