diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index ad80e458ab57d..53be7fc0bee9f 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -125,6 +125,12 @@ bool llvm::isTriviallyScalarizable(Intrinsic::ID ID, // https://github.com/llvm/llvm-project/issues/112408 switch (ID) { case Intrinsic::frexp: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: return true; } return false; diff --git a/llvm/test/Transforms/Scalarizer/sadd_with_overflow.ll b/llvm/test/Transforms/Scalarizer/sadd_with_overflow.ll new file mode 100644 index 0000000000000..1e5c50358bb4a --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/sadd_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.sadd.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +} diff --git a/llvm/test/Transforms/Scalarizer/sincos.ll b/llvm/test/Transforms/Scalarizer/sincos.ll new file mode 100644 index 0000000000000..8db4ba3183290 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/sincos.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +; Test to make sure that struct return intrinsics that are not `isTriviallyScalarizable` do not get scalarized. + +define <4 x float> @test_(<4 x float> %Val) { +; CHECK-LABEL: define <4 x float> @test_( +; CHECK-SAME: <4 x float> [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> [[VAL]]) +; CHECK-NEXT: [[EL:%.*]] = extractvalue { <4 x float>, <4 x float> } [[R]], 0 +; CHECK-NEXT: ret <4 x float> [[EL]] +; + %r = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) + %el = extractvalue { <4 x float>, <4 x float> } %r, 0 + ret <4 x float> %el +} + diff --git a/llvm/test/Transforms/Scalarizer/smul_with_overflow.ll b/llvm/test/Transforms/Scalarizer/smul_with_overflow.ll new file mode 100644 index 0000000000000..c934077b24f30 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/smul_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.smul.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +} diff --git a/llvm/test/Transforms/Scalarizer/ssub_with_overflow.ll b/llvm/test/Transforms/Scalarizer/ssub_with_overflow.ll new file mode 100644 index 0000000000000..3654e20b51599 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/ssub_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.ssub.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +} diff --git a/llvm/test/Transforms/Scalarizer/uadd_overflow.ll b/llvm/test/Transforms/Scalarizer/uadd_overflow.ll deleted file mode 100644 index 39094451523a5..0000000000000 --- a/llvm/test/Transforms/Scalarizer/uadd_overflow.ll +++ /dev/null @@ -1,16 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt %s -passes='function(scalarizer)' -S | FileCheck %s - -; Test to make sure that struct return intrinsics that are not `isTriviallyScalarizable` do not get scalarized. - -define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { -; CHECK-LABEL: define <3 x i32> @test_( -; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[B]], <3 x i32> [[B]]) -; CHECK-NEXT: [[EL:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[R]], 0 -; CHECK-NEXT: ret <3 x i32> [[EL]] -; - %r = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) - %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 - ret <3 x i32> %el -} diff --git a/llvm/test/Transforms/Scalarizer/uadd_with_overflow.ll b/llvm/test/Transforms/Scalarizer/uadd_with_overflow.ll new file mode 100644 index 0000000000000..1c4b6124b00bb --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/uadd_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +} diff --git a/llvm/test/Transforms/Scalarizer/umul_with_overflow.ll b/llvm/test/Transforms/Scalarizer/umul_with_overflow.ll new file mode 100644 index 0000000000000..ac1ca113bc4bd --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/umul_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.umul.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +} diff --git a/llvm/test/Transforms/Scalarizer/usub_with_overflow.ll b/llvm/test/Transforms/Scalarizer/usub_with_overflow.ll new file mode 100644 index 0000000000000..fe1b5305d8680 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/usub_with_overflow.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s + +define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: define <3 x i32> @test_( +; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) { +; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]]) +; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]]) +; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2 +; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]]) +; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0 +; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0 +; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0 +; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0 +; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1 +; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[EL]] +; + %r = call { <3 x i32>, <3 x i1> } @llvm.usub.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b) + %el = extractvalue { <3 x i32>, <3 x i1> } %r, 0 + ret <3 x i32> %el +}