Skip to content

Commit d1e9093

Browse files
nikiccuviper
authored andcommitted
[AArch64] Only apply bool vector bitcast opt if result is scalar (llvm#81256)
This optimization tries to optimize bitcasts from `<N x i1>` to iN, but currently also triggers for `<N x i1>` to `<M x iK>` bitcasts, if custom lowering has been requested for these for an unrelated reason. Fix this by explicitly checking that the result type is scalar. Fixes llvm#81216. (cherry picked from commit 92d7992)
1 parent 74821af commit d1e9093

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -23536,7 +23536,8 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
2353623536
return;
2353723537
}
2353823538

23539-
if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1)
23539+
if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
23540+
!VT.isVector())
2354023541
return replaceBoolVectorBitcast(N, Results, DAG);
2354123542

2354223543
if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))

llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll

+28
Original file line numberDiff line numberDiff line change
@@ -500,3 +500,31 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
500500
%bitmask = bitcast <6 x i1> %cmp_result to i6
501501
ret i6 %bitmask
502502
}
503+
504+
; Only apply the combine when casting a vector to a scalar.
505+
define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
506+
; CHECK-LABEL: vector_to_vector_cast:
507+
; CHECK: ; %bb.0:
508+
; CHECK-NEXT: sub sp, sp, #16
509+
; CHECK-NEXT: shl.16b v0, v0, #7
510+
; CHECK-NEXT: Lloh36:
511+
; CHECK-NEXT: adrp x8, lCPI20_0@PAGE
512+
; CHECK-NEXT: Lloh37:
513+
; CHECK-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF]
514+
; CHECK-NEXT: add x8, sp, #14
515+
; CHECK-NEXT: cmlt.16b v0, v0, #0
516+
; CHECK-NEXT: and.16b v0, v0, v1
517+
; CHECK-NEXT: ext.16b v1, v0, v0, #8
518+
; CHECK-NEXT: zip1.16b v0, v0, v1
519+
; CHECK-NEXT: addv.8h h0, v0
520+
; CHECK-NEXT: str h0, [sp, #14]
521+
; CHECK-NEXT: ld1.b { v0 }[0], [x8]
522+
; CHECK-NEXT: orr x8, x8, #0x1
523+
; CHECK-NEXT: ld1.b { v0 }[4], [x8]
524+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
525+
; CHECK-NEXT: add sp, sp, #16
526+
; CHECK-NEXT: ret
527+
; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh37
528+
%bc = bitcast <16 x i1> %arg to <2 x i8>
529+
ret <2 x i8> %bc
530+
}

0 commit comments

Comments
 (0)