Skip to content

Commit e3f080d

Browse files
committed
[Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N)
- Update clang codegen for loads/stores to read/write the legal in-memory representation for _BitInt(N <= 128) and _BitInt(N <= 64). - AArch64: for _BitInt(N <= 128) the machine type is the smallest (un)signed fundamental integral data types. - ARM: for _BitInt(N <= 64) the machine type is the smallest (un)signed fundamental integral data types. So, Loads and Stores will be as following: N - bit-precise integer size as declared M - number of bits in the representation, M >= N Loads %u = load iM, ptr %p %v = trunc iM %u to iN Stores %u = Xext iN %v to iM store iM %u, ptr %p where Xext is zext or sext on ARM, depending on C type, and zext for AArch64. These changes are according to the ABI documentation for: ARM: https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst AArch64: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst Change-Id: I03d675afb4e749b00fef075aa10923682232dd79 Change-Id: I4beac3b92e06506606c8ee57866507a62ba42fba
1 parent 57790db commit e3f080d

File tree

11 files changed

+149
-28
lines changed

11 files changed

+149
-28
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,11 @@ class TargetInfo : public TransferrableTargetInfo,
667667
return false;
668668
}
669669

670+
// Different targets may support different machine type width for the _BitInt
671+
virtual unsigned getBitIntLegalWidth(unsigned Width) const { return Width; }
672+
673+
virtual bool isBitIntSignExtended(bool IsSigned) const { return false; }
674+
670675
// Different targets may support a different maximum width for the _BitInt
671676
// type, depending on what operations are supported.
672677
virtual size_t getMaxBitIntWidth() const {

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ bool AArch64TargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
221221
return true;
222222
}
223223

224+
unsigned AArch64TargetInfo::getBitIntLegalWidth(unsigned Width) const {
225+
return getBitIntWidth(Width);
226+
}
227+
224228
bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef,
225229
BranchProtectionInfo &BPI,
226230
StringRef &Err) const {

clang/lib/Basic/Targets/AArch64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
202202
bool hasBitIntType() const override { return true; }
203203

204204
bool validateTarget(DiagnosticsEngine &Diags) const override;
205+
206+
unsigned getBitIntLegalWidth(unsigned Width) const override;
205207
};
206208

207209
class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {

clang/lib/Basic/Targets/ARM.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,16 @@ int ARMTargetInfo::getEHDataRegisterNumber(unsigned RegNo) const {
13441344

13451345
bool ARMTargetInfo::hasSjLjLowering() const { return true; }
13461346

1347+
unsigned ARMTargetInfo::getBitIntLegalWidth(unsigned Width) const {
1348+
return getBitIntWidth(Width);
1349+
}
1350+
1351+
bool ARMTargetInfo::isBitIntSignExtended(bool IsSigned) const {
1352+
if (IsSigned)
1353+
return true;
1354+
return false;
1355+
}
1356+
13471357
ARMleTargetInfo::ARMleTargetInfo(const llvm::Triple &Triple,
13481358
const TargetOptions &Opts)
13491359
: ARMTargetInfo(Triple, Opts) {}

clang/lib/Basic/Targets/ARM.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
229229
std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
230230
return std::make_pair(getTriple().isArch64Bit() ? 256 : 64, 64);
231231
}
232+
233+
unsigned getBitIntLegalWidth(unsigned Width) const override;
234+
235+
bool isBitIntSignExtended(bool IsSigned) const override;
232236
};
233237

234238
class LLVM_LIBRARY_VISIBILITY ARMleTargetInfo : public ARMTargetInfo {

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1989,7 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
19891989
return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
19901990
}
19911991

1992-
llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
1992+
llvm::LoadInst *Load = nullptr;
1993+
if (const auto *BitintTy = Ty->getAs<BitIntType>()) {
1994+
Address TempAddr(Addr.getBasePointer(), ConvertTypeForMem(Ty),
1995+
Addr.getAlignment());
1996+
Load = Builder.CreateLoad(TempAddr, Volatile);
1997+
} else
1998+
Load = Builder.CreateLoad(Addr, Volatile);
1999+
19932000
if (isNontemporal) {
19942001
llvm::MDNode *Node = llvm::MDNode::get(
19952002
Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
@@ -2021,6 +2028,12 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
20212028
assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
20222029
"wrong value rep of bool");
20232030
}
2031+
if (auto *BitIntTy = Ty->getAs<BitIntType>()) {
2032+
if (CGM.getTarget().isBitIntSignExtended(BitIntTy->isSigned()))
2033+
return Builder.CreateSExt(Value, ConvertTypeForMem(Ty), "sext_bitint");
2034+
else
2035+
return Builder.CreateZExt(Value, ConvertTypeForMem(Ty), "zext_bitint");
2036+
}
20242037

20252038
return Value;
20262039
}
@@ -2043,6 +2056,8 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
20432056
unsigned ValNumElems = cast<llvm::FixedVectorType>(ValTy)->getNumElements();
20442057
return emitBoolVecConversion(V, ValNumElems, "extractvec");
20452058
}
2059+
if (Ty->getAs<BitIntType>())
2060+
return Builder.CreateTrunc(Value, ConvertType(Ty), "to_bitint");
20462061

20472062
return Value;
20482063
}

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
114114
return llvm::IntegerType::get(getLLVMContext(),
115115
(unsigned)Context.getTypeSize(T));
116116

117+
if (T->isBitIntType())
118+
return llvm::IntegerType::get(getLLVMContext(),
119+
CGM.getTarget().getBitIntLegalWidth(
120+
T->getAs<BitIntType>()->getNumBits()));
117121
// Else, don't map it.
118122
return R;
119123
}

clang/test/CodeGen/AArch64/BitInt.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64 -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple aarch64 -S -o /dev/null %s
4+
5+
_BitInt(18) signed_src;
6+
_BitInt(18) signed_dst;
7+
8+
// CHECK-LABEL: define dso_local void @test_signed(
9+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: [[ENTRY:.*:]]
11+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @signed_dst, align 4
12+
// CHECK-NEXT: [[TOBITINT:%.*]] = trunc i32 [[TMP0]] to i18
13+
// CHECK-NEXT: [[ZEXTBITINT:%.*]] = zext i18 [[TOBITINT]] to i32
14+
// CHECK-NEXT: store i32 [[ZEXTBITINT]], ptr @signed_src, align 4
15+
// CHECK-NEXT: ret void
16+
//
17+
void test_signed() {
18+
signed_src = signed_dst;
19+
}
20+
21+
unsigned _BitInt(18) src;
22+
unsigned _BitInt(18) dst;
23+
24+
// CHECK-LABEL: define dso_local void @test_unsigned(
25+
// CHECK-SAME: ) #[[ATTR0]] {
26+
// CHECK-NEXT: [[ENTRY:.*:]]
27+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @dst, align 4
28+
// CHECK-NEXT: [[TOBITINT:%.*]] = trunc i32 [[TMP0]] to i18
29+
// CHECK-NEXT: [[ZEXTBITINT:%.*]] = zext i18 [[TOBITINT]] to i32
30+
// CHECK-NEXT: store i32 [[ZEXTBITINT]], ptr @src, align 4
31+
// CHECK-NEXT: ret void
32+
//
33+
void test_unsigned() {
34+
src = dst;
35+
}

clang/test/CodeGen/Arm/BitInt.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2+
// RUN: %clang_cc1 -triple arm -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple arm -S -o /dev/null %s
4+
5+
_BitInt(18) signed_src;
6+
_BitInt(18) signed_dst;
7+
8+
// CHECK-LABEL: define dso_local arm_aapcscc void @test_signed(
9+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: entry:
11+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @signed_dst, align 4
12+
// CHECK-NEXT: [[TOBITINT:%.*]] = trunc i32 [[TMP0]] to i18
13+
// CHECK-NEXT: [[SEXTBITINT:%.*]] = sext i18 [[TOBITINT]] to i32
14+
// CHECK-NEXT: store i32 [[SEXTBITINT]], ptr @signed_src, align 4
15+
// CHECK-NEXT: ret void
16+
//
17+
void test_signed() {
18+
signed_src = signed_dst;
19+
}
20+
21+
unsigned _BitInt(18) src;
22+
unsigned _BitInt(18) dst;
23+
24+
// CHECK-LABEL: define dso_local arm_aapcscc void @test_unsigned(
25+
// CHECK-SAME: ) #[[ATTR0]] {
26+
// CHECK-NEXT: entry:
27+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @dst, align 4
28+
// CHECK-NEXT: [[TOBITINT:%.*]] = trunc i32 [[TMP0]] to i18
29+
// CHECK-NEXT: [[ZEXTBITINT:%.*]] = zext i18 [[TOBITINT]] to i32
30+
// CHECK-NEXT: store i32 [[ZEXTBITINT]], ptr @src, align 4
31+
// CHECK-NEXT: ret void
32+
//
33+
void test_unsigned() {
34+
src = dst;
35+
}
36+

clang/test/CodeGen/attr-noundef.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,9 @@ void pass_large_BitInt(_BitInt(127) e) {
159159

160160
// TODO: for now, ExtInt is only noundef if it is sign/zero-extended
161161
// CHECK-INTEL: [[DEF]] noundef signext i3 @{{.*}}ret_BitInt{{.*}}()
162-
// CHECK-AARCH: [[DEF]] i3 @{{.*}}ret_BitInt{{.*}}()
162+
// CHECK-AARCH: [[DEF]] noundef i3 @{{.*}}ret_BitInt{{.*}}()
163163
// CHECK-INTEL: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 noundef signext %
164-
// CHECK-AARCH: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 %
164+
// CHECK-AARCH: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 noundef %
165165
// CHECK-INTEL: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i64 %{{.*}}, i64 %
166-
// CHECK-AARCH: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i127 %
166+
// CHECK-AARCH: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i127 noundef %
167167
} // namespace check_exotic

clang/test/CodeGen/builtins-bitint.c

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi1(
99
// CHECK-O0-SAME: ) #[[ATTR0:[0-9]+]] {
1010
// CHECK-O0-NEXT: entry:
11-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
12-
// CHECK-O0-NEXT: store i1 true, ptr [[A]], align 1
13-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
14-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[TMP0]])
11+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
12+
// CHECK-O0-NEXT: store i8 1, ptr [[A]], align 1
13+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
14+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i1
15+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[TO_BITINT]])
1516
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
1617
// CHECK-O0-NEXT: ret i32 [[CAST]]
1718
//
@@ -28,10 +29,11 @@ int test_popcountg_ubi1() {
2829
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi2(
2930
// CHECK-O0-SAME: ) #[[ATTR0]] {
3031
// CHECK-O0-NEXT: entry:
31-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
32-
// CHECK-O0-NEXT: store i2 -1, ptr [[A]], align 1
33-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
34-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[TMP0]])
32+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
33+
// CHECK-O0-NEXT: store i8 3, ptr [[A]], align 1
34+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
35+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i2
36+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[TO_BITINT]])
3537
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
3638
// CHECK-O0-NEXT: ret i32 [[CAST]]
3739
//
@@ -48,10 +50,11 @@ int test_popcountg_ubi2() {
4850
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi1(
4951
// CHECK-O0-SAME: ) #[[ATTR0]] {
5052
// CHECK-O0-NEXT: entry:
51-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
52-
// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
53-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
54-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[TMP0]], i1 false)
53+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
54+
// CHECK-O0-NEXT: store i8 0, ptr [[A]], align 1
55+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
56+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i1
57+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[TO_BITINT]], i1 false)
5558
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
5659
// CHECK-O0-NEXT: ret i32 [[CAST]]
5760
//
@@ -68,10 +71,11 @@ int test_ctzg_ubi1() {
6871
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi2(
6972
// CHECK-O0-SAME: ) #[[ATTR0]] {
7073
// CHECK-O0-NEXT: entry:
71-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
72-
// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
73-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
74-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[TMP0]], i1 false)
74+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
75+
// CHECK-O0-NEXT: store i8 0, ptr [[A]], align 1
76+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
77+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i2
78+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[TO_BITINT]], i1 false)
7579
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
7680
// CHECK-O0-NEXT: ret i32 [[CAST]]
7781
//
@@ -88,10 +92,11 @@ int test_ctzg_ubi2() {
8892
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi1(
8993
// CHECK-O0-SAME: ) #[[ATTR0]] {
9094
// CHECK-O0-NEXT: entry:
91-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
92-
// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
93-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
94-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[TMP0]], i1 false)
95+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
96+
// CHECK-O0-NEXT: store i8 0, ptr [[A]], align 1
97+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
98+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i1
99+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[TO_BITINT]], i1 false)
95100
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
96101
// CHECK-O0-NEXT: ret i32 [[CAST]]
97102
//
@@ -108,10 +113,11 @@ int test_clzg_ubi1() {
108113
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi2(
109114
// CHECK-O0-SAME: ) #[[ATTR0]] {
110115
// CHECK-O0-NEXT: entry:
111-
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
112-
// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
113-
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
114-
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[TMP0]], i1 false)
116+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i8, align 1
117+
// CHECK-O0-NEXT: store i8 0, ptr [[A]], align 1
118+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
119+
// CHECK-O0-NEXT: [[TO_BITINT:%.*]] = trunc i8 [[TMP0]] to i2
120+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[TO_BITINT]], i1 false)
115121
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
116122
// CHECK-O0-NEXT: ret i32 [[CAST]]
117123
//

0 commit comments

Comments
 (0)