llvm · earnol · Aug 15, 2024 · Aug 7, 2024 · bjope · Aug 21, 2024
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -64,6 +65,22 @@ static llvm::cl::opt<bool> ClSanitizeGuardChecks(
     "ubsan-guard-checks", llvm::cl::Optional,
     llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`."));
 
+//===--------------------------------------------------------------------===//
+//                        Defines for metadata
+//===--------------------------------------------------------------------===//
+
+// Those values are crucial to be the SAME as in ubsan runtime library.
+enum VariableTypeDescriptorKind : uint16_t {
+  /// An integer type.
+  TK_Integer = 0x0000,
+  /// A floating-point type.
+  TK_Float = 0x0001,
+  /// An _BitInt(N) type.
+  TK_BitInt = 0x0002,
+  /// Any other type. The value representation is unspecified.
+  TK_Unknown = 0xffff
+};
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3298,22 +3315,40 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
 ///   { i16 TypeKind, i16 TypeInfo }
 /// \endcode
 ///
-/// followed by an array of i8 containing the type name. TypeKind is 0 for an
-/// integer, 1 for a floating point value, and -1 for anything else.
+/// followed by an array of i8 containing the type name with extra information
+/// for BitInt. TypeKind is TK_Integer(0) for an integer, TK_Float(1) for a
+/// floating point value, TK_BitInt(2) for BitInt and TK_Unknown(0xFFFF) for
+/// anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
   if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
-  uint16_t TypeKind = -1;
+  uint16_t TypeKind = TK_Unknown;
   uint16_t TypeInfo = 0;
+  bool IsBitInt = false;
 
   if (T->isIntegerType()) {
-    TypeKind = 0;
+    TypeKind = TK_Integer;
     TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
                (T->isSignedIntegerType() ? 1 : 0);
+    // Follow suggestion from discussion of issue 64100.
+    // So we can write the exact amount of bits in TypeName after '\0'
+    // making it <diagnostic-like type name>.'\0'.<32-bit width>.
+    if (T->isSignedIntegerType() && T->getAs<BitIntType>()) {
+      // Do a sanity checks as we are using 32-bit type to store bit length.
+      assert(getContext().getTypeSize(T) > 0 &&
+             " non positive amount of bits in __BitInt type");
+      assert(getContext().getTypeSize(T) <= 0xFFFFFFFF &&
+             " too many bits in __BitInt type");
+
+      // Redefine TypeKind with the actual __BitInt type if we have signed
+      // BitInt.
+      TypeKind = TK_BitInt;
+      IsBitInt = true;
+    }
   } else if (T->isFloatingType()) {
-    TypeKind = 1;
+    TypeKind = TK_Float;
     TypeInfo = getContext().getTypeSize(T);
   }
 
@@ -3324,6 +3359,20 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
       DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(),
       StringRef(), std::nullopt, Buffer, std::nullopt);
 
+  if (IsBitInt) {
+    // The Structure is: 0 to end the string, 32 bit unsigned integer in target
+    // endianness, zero.
+    char S[6] = {'\0', '\0', '\0', '\0', '\0', '\0'};
+    const auto *EIT = T->castAs<BitIntType>();
+    uint32_t Bits = EIT->getNumBits();
+    llvm::support::endian::write32(S + 1, Bits,
+                                   getTarget().isBigEndian()
+                                       ? llvm::endianness::big
+                                       : llvm::endianness::little);
+    StringRef Str = StringRef(S, sizeof(S) / sizeof(decltype(S[0])));
+    Buffer.append(Str);
+  }
+
   llvm::Constant *Components[] = {
     Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo),
     llvm::ConstantDataArray::getString(getLLVMContext(), Buffer)

diff --git a/clang/test/CodeGen/bit-int-ubsan.c b/clang/test/CodeGen/bit-int-ubsan.c
@@ -0,0 +1,96 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O0 -S -emit-llvm -o - %s | FileCheck %s
+
+// The runtime test checking the _BitInt ubsan feature is located in compiler-rt/test/ubsan/TestCases/Integer/bit-int.c
+
+#include <stdint.h>
+#include <stdio.h>
+
+uint32_t float_divide_by_zero() {
+  float f = 1.0f / 0.0f;
+  // CHECK: constant { i16, i16, [8 x i8] } { i16 1, i16 32, [8 x i8] c"'float'\00" }
+  _BitInt(37) r = (_BitInt(37))f;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(37)'\00%\00\00\00\00\00" }
+  return r;
+}
+
+uint32_t integer_divide_by_zero() __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) x = 1 / 0;
+  // CHECK: constant { i16, i16, [32 x i8] } { i16 0, i16 10, [32 x i8] c"'uint32_t' (aka 'unsigned int')\00" }
+  return x;
+}
+
+uint32_t implicit_unsigned_integer_truncation() {
+  unsigned _BitInt(37) x = 2U;
+  x += float_divide_by_zero();
+  x += integer_divide_by_zero();
+  x = x + 0xFFFFFFFFFFFFFFFFULL;
+  // CHECK: constant { i16, i16, [23 x i8] } { i16 0, i16 12, [23 x i8] c"'unsigned _BitInt(37)'\00" }
+  uint32_t r = x & 0xFFFFFFFF;
+  return r;
+}
+
+uint32_t array_bounds() {
+  _BitInt(37) x[4];
+  _BitInt(37) y = x[10];
+  // CHECK: constant { i16, i16, [17 x i8] } { i16 -1, i16 0, [17 x i8] c"'_BitInt(37)[4]'\00" }
+  return (uint32_t)y;
+}
+
+uint32_t float_cast_overflow() {
+  float a = 100000000.0f;
+  _BitInt(7) b = (_BitInt(7))a;
+  // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 7, [19 x i8] c"'_BitInt(7)'\00\07\00\00\00\00\00" }
+  return b;
+}
+
+_BitInt(13) implicit_signed_integer_truncation() {
+  _BitInt(73) x = (_BitInt(73)) ~((~0UL) >> 1);
+  return x;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(73)'\00I\00\00\00\00\00" }
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 9, [20 x i8] c"'_BitInt(13)'\00\0D\00\00\00\00\00" }
+}
+
+uint32_t negative_shift1(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(9) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [19 x i8] } { i16 2, i16 9, [19 x i8] c"'_BitInt(9)'\00\09\00\00\00\00\00" }
+}
+
+uint32_t negative_shift2(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(17) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 11, [20 x i8] c"'_BitInt(17)'\00\11\00\00\00\00\00" }
+}
+
+uint32_t negative_shift3(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(34) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 13, [20 x i8] c"'_BitInt(34)'\00\22\00\00\00\00\00" }
+}
+
+uint32_t negative_shift5(unsigned _BitInt(37) x)
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(68) c = -2;
+  return x >> c;
+  // CHECK: constant { i16, i16, [20 x i8] } { i16 2, i16 {{([[:xdigit:]]{2})}}, [20 x i8] c"'_BitInt(68)'\00D\00\00\00\00\00" }
+}
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      implicit_unsigned_integer_truncation() +
+      (uint32_t)array_bounds() +
+      float_cast_overflow() +
+      (uint64_t)implicit_signed_integer_truncation() +
+      negative_shift1(5) +
+      negative_shift2(5) +
+      negative_shift3(5) +
+      negative_shift5(5);
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -67,18 +67,21 @@ const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
 
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
+  // Val was zero-extended to ValueHandle. Sign-extend from original width
+  // to SIntMax.
+  const unsigned ExtraBits =
+      sizeof(SIntMax) * 8 - getType().getIntegerBitCount();
   if (isInlineInt()) {
-    // Val was zero-extended to ValueHandle. Sign-extend from original width
-    // to SIntMax.
-    const unsigned ExtraBits =
-      sizeof(SIntMax) * 8 - getType().getIntegerBitWidth();
     return SIntMax(UIntMax(Val) << ExtraBits) >> ExtraBits;
   }
-  if (getType().getIntegerBitWidth() == 64)
-    return *reinterpret_cast<s64*>(Val);
+  if (getType().getIntegerBitWidth() == 64) {
+    return SIntMax(UIntMax(*reinterpret_cast<s64 *>(Val)) << ExtraBits) >>
+           ExtraBits;
+  }
 #if HAVE_INT128_T
   if (getType().getIntegerBitWidth() == 128)
-    return *reinterpret_cast<s128*>(Val);
+    return SIntMax(UIntMax(*reinterpret_cast<s128 *>(Val)) << ExtraBits) >>
+           ExtraBits;
 #else
   if (getType().getIntegerBitWidth() == 128)
     UNREACHABLE("libclang_rt.ubsan was built without __int128 support");

diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -103,6 +103,13 @@ class TypeDescriptor {
     /// representation is that of bitcasting the floating-point value to an
     /// integer type.
     TK_Float = 0x0001,
+    /// An _BitInt(N) type. Lowest bit is 1 for a signed value, 0 for an
+    /// unsigned value. Remaining bits are log_2(bit_width). The value
+    /// representation is the integer itself if it fits into a ValueHandle, and
+    /// a pointer to the integer otherwise. TypeName contains the true width
+    /// of the type for the signed _BitInt(N) type stored after zero bit after
+    /// TypeName as 32-bit unsigned integer.
+    TK_BitInt = 0x0002,
     /// Any other type. The value representation is unspecified.
     TK_Unknown = 0xffff
   };
@@ -113,10 +120,15 @@ class TypeDescriptor {
     return static_cast<Kind>(TypeKind);
   }
 
-  bool isIntegerTy() const { return getKind() == TK_Integer; }
+  bool isIntegerTy() const {
+    return getKind() == TK_Integer || getKind() == TK_BitInt;
+  }
+  bool isBitIntTy() const { return getKind() == TK_BitInt; }
+
   bool isSignedIntegerTy() const {
     return isIntegerTy() && (TypeInfo & 1);
   }
+  bool isSignedBitIntTy() const { return isBitIntTy() && (TypeInfo & 1); }
   bool isUnsignedIntegerTy() const {
     return isIntegerTy() && !(TypeInfo & 1);
   }
@@ -125,6 +137,25 @@ class TypeDescriptor {
     return 1 << (TypeInfo >> 1);
   }
 
+  const char *getBitIntBitCountPointer() const {
+    DCHECK(isBitIntTy());
+    DCHECK(isSignedBitIntTy());
+    // Scan Name for zero and return the next address
+    const char *p = getTypeName();
+    while (*p != '\0')
+      ++p;
+    // Return the next address
+    return p + 1;
+  }
+
+  unsigned getIntegerBitCount() const {
+    DCHECK(isIntegerTy());
+    if (isSignedBitIntTy())
+      return *reinterpret_cast<const u32 *>(getBitIntBitCountPointer());
+    else
+      return getIntegerBitWidth();
+  }
+
   bool isFloatTy() const { return getKind() == TK_Float; }
   unsigned getFloatBitWidth() const {
     CHECK(isFloatTy());

diff --git a/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c b/compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c
@@ -0,0 +1,42 @@
+// RUN: %clang -Wno-constant-conversion -Wno-array-bounds -Wno-division-by-zero -Wno-shift-negative-value -Wno-shift-count-negative -Wno-int-to-pointer-cast -O0 -fsanitize=alignment,array-bounds,bool,float-cast-overflow,implicit-integer-sign-change,implicit-signed-integer-truncation,implicit-unsigned-integer-truncation,integer-divide-by-zero,nonnull-attribute,null,nullability-arg,nullability-assign,nullability-return,pointer-overflow,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,unsigned-integer-overflow,unsigned-shift-base,vla-bound %s -o %t1 && %run %t1 2>&1 | FileCheck %s
+
+#include <stdint.h>
+#include <stdio.h>
+
+// In this test there is an expectation of assignment of _BitInt not producing any output.
+uint32_t nullability_arg(_BitInt(37) *_Nonnull x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(37) y = *(_BitInt(37) *)&x;
+  return (y > 0) ? y : 0;
+}
+
+// In this test there is an expectation of ubsan not triggeting on returning random address which is inside address space of the process.
+_BitInt(37) nonnull_attribute(__attribute__((nonnull)) _BitInt(37) * x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  return *(_BitInt(37) *)&x;
+}
+
+// In this test there is an expectation of assignment of uint32_t from "invalid" _BitInt is not producing any output.
+uint32_t nullability_assign(_BitInt(7) * x)
+    __attribute__((no_sanitize("address")))
+    __attribute__((no_sanitize("memory"))) {
+  _BitInt(7) *_Nonnull y = x;
+  int32_t r = *(_BitInt(7) *)&y;
+  return (r > 0) ? r : 0;
+}
+
+// In those examples the file is expected to compile & run with no diagnostics
+// CHECK-NOT: runtime error:
+
+int main(int argc, char **argv) {
+  // clang-format off
+  uint64_t result =
+      1ULL +
+      nullability_arg((_BitInt(37) *)argc) +
+      ((uint64_t)nonnull_attribute((_BitInt(37) *)argc) & 0xFFFFFFFF) +
+      nullability_assign((_BitInt(7) *)argc);
+  // clang-format on
+  printf("%u\n", (uint32_t)(result & 0xFFFFFFFF));
+}