Skip to content

Commit b6e5352

Browse files
davemgreenAlexisPerry
authored andcommitted
[AArch64][GlobalISel] Add fp128 and i128 fptosi/fptoui handling. (llvm#95528)
Any fp128 need to end up as libcall, as will f32->i128 and f64->i128. f16 are a bit special as the maximum range of the result fits in a i17, so can be shrank to an i64. Vector with i128/fp128 types are scalarized.
1 parent b7c723d commit b6e5352

File tree

5 files changed

+1825
-1074
lines changed

5 files changed

+1825
-1074
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,15 +1123,13 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
11231123
case TargetOpcode::G_FPTOSI:
11241124
case TargetOpcode::G_FPTOUI: {
11251125
// FIXME: Support other types
1126-
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1126+
Type *FromTy =
1127+
getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
11271128
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1128-
if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1129+
if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
11291130
return UnableToLegalize;
11301131
LegalizeResult Status = conversionLibcall(
1131-
MI, MIRBuilder,
1132-
ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1133-
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1134-
LocObserver);
1132+
MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver);
11351133
if (Status != Legalized)
11361134
return Status;
11371135
break;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -661,34 +661,52 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
661661

662662
// Conversions
663663
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
664-
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
664+
.legalFor({{s32, s32},
665+
{s64, s32},
666+
{s32, s64},
667+
{s64, s64},
668+
{v2s64, v2s64},
669+
{v4s32, v4s32},
670+
{v2s32, v2s32}})
665671
.legalIf([=](const LegalityQuery &Query) {
666672
return HasFP16 &&
667673
(Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
668674
Query.Types[1] == v8s16) &&
669675
(Query.Types[0] == s32 || Query.Types[0] == s64 ||
670676
Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
671677
})
672-
.widenScalarToNextPow2(0)
673-
.clampScalar(0, s32, s64)
674-
.widenScalarToNextPow2(1)
675-
.clampScalarOrElt(1, MinFPScalar, s64)
678+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
679+
.scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
680+
// The range of a fp16 value fits into an i17, so we can lower the width
681+
// to i64.
682+
.narrowScalarIf(
683+
[=](const LegalityQuery &Query) {
684+
return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
685+
},
686+
changeTo(0, s64))
676687
.moreElementsToNextPow2(0)
688+
.widenScalarOrEltToNextPow2OrMinSize(0)
689+
.minScalar(0, s32)
690+
.widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
677691
.widenScalarIf(
678692
[=](const LegalityQuery &Query) {
679-
return Query.Types[0].getScalarSizeInBits() >
680-
Query.Types[1].getScalarSizeInBits();
693+
return Query.Types[0].getScalarSizeInBits() <= 64 &&
694+
Query.Types[0].getScalarSizeInBits() >
695+
Query.Types[1].getScalarSizeInBits();
681696
},
682697
LegalizeMutations::changeElementSizeTo(1, 0))
683698
.widenScalarIf(
684699
[=](const LegalityQuery &Query) {
685-
return Query.Types[0].getScalarSizeInBits() <
686-
Query.Types[1].getScalarSizeInBits();
700+
return Query.Types[1].getScalarSizeInBits() <= 64 &&
701+
Query.Types[0].getScalarSizeInBits() <
702+
Query.Types[1].getScalarSizeInBits();
687703
},
688704
LegalizeMutations::changeElementSizeTo(0, 1))
689705
.clampNumElements(0, v4s16, v8s16)
690706
.clampNumElements(0, v2s32, v4s32)
691-
.clampMaxNumElements(0, s64, 2);
707+
.clampMaxNumElements(0, s64, 2)
708+
.libcallFor(
709+
{{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
692710

693711
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
694712
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})

llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll

Lines changed: 0 additions & 8 deletions
This file was deleted.

llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -711,79 +711,3 @@ body: |
711711
%1:fpr(<2 x s32>) = G_UITOFP %0
712712
$d0 = COPY %1(<2 x s32>)
713713
...
714-
715-
---
716-
name: fptosi_v2s64_v2s32
717-
legalized: true
718-
regBankSelected: true
719-
720-
body: |
721-
bb.0:
722-
liveins: $d0
723-
724-
; CHECK-LABEL: name: fptosi_v2s64_v2s32
725-
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
726-
; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]]
727-
; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[FCVTLv2i32_]]
728-
; CHECK: $q0 = COPY [[FCVTZSv2f64_]]
729-
%0:fpr(<2 x s32>) = COPY $d0
730-
%1:fpr(<2 x s64>) = G_FPTOSI %0
731-
$q0 = COPY %1(<2 x s64>)
732-
...
733-
734-
---
735-
name: fptoui_v2s64_v2s32
736-
legalized: true
737-
regBankSelected: true
738-
739-
body: |
740-
bb.0:
741-
liveins: $d0
742-
743-
; CHECK-LABEL: name: fptoui_v2s64_v2s32
744-
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
745-
; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]]
746-
; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[FCVTLv2i32_]]
747-
; CHECK: $q0 = COPY [[FCVTZUv2f64_]]
748-
%0:fpr(<2 x s32>) = COPY $d0
749-
%1:fpr(<2 x s64>) = G_FPTOUI %0
750-
$q0 = COPY %1(<2 x s64>)
751-
...
752-
753-
---
754-
name: fptosi_v2s32_v2s64
755-
legalized: true
756-
regBankSelected: true
757-
758-
body: |
759-
bb.0:
760-
liveins: $q0
761-
762-
; CHECK-LABEL: name: fptosi_v2s32_v2s64
763-
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
764-
; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[COPY]]
765-
; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZSv2f64_]]
766-
; CHECK: $d0 = COPY [[XTNv2i32_]]
767-
%0:fpr(<2 x s64>) = COPY $q0
768-
%1:fpr(<2 x s32>) = G_FPTOSI %0
769-
$d0 = COPY %1(<2 x s32>)
770-
...
771-
772-
---
773-
name: fptoui_v2s32_v2s64
774-
legalized: true
775-
regBankSelected: true
776-
777-
body: |
778-
bb.0:
779-
liveins: $q0
780-
781-
; CHECK-LABEL: name: fptoui_v2s32_v2s64
782-
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
783-
; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[COPY]]
784-
; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZUv2f64_]]
785-
; CHECK: $d0 = COPY [[XTNv2i32_]]
786-
%0:fpr(<2 x s64>) = COPY $q0
787-
%1:fpr(<2 x s32>) = G_FPTOUI %0
788-
$d0 = COPY %1(<2 x s32>)
789-
...

0 commit comments

Comments
 (0)