GPUOpen-Drivers
diff --git a/‎clang/include/clang/Basic/RISCVVTypes.def
Lines changed: 56 additions & 0 deletions b/‎clang/include/clang/Basic/RISCVVTypes.def
Lines changed: 56 additions & 0 deletions
diff --git a/‎clang/include/clang/Serialization/ASTBitCodes.h
Lines changed: 1 addition & 1 deletion b/‎clang/include/clang/Serialization/ASTBitCodes.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/include/clang/Support/RISCVVIntrinsicUtils.h
Lines changed: 3 additions & 2 deletions b/‎clang/include/clang/Support/RISCVVIntrinsicUtils.h
Lines changed: 3 additions & 2 deletions
diff --git a/‎clang/lib/Support/RISCVVIntrinsicUtils.cpp
Lines changed: 3 additions & 0 deletions b/‎clang/lib/Support/RISCVVIntrinsicUtils.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
Lines changed: 55 additions & 0 deletions b/‎clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
Lines changed: 55 additions & 0 deletions
diff --git a/‎clang/utils/TableGen/RISCVVEmitter.cpp
Lines changed: 3 additions & 1 deletion b/‎clang/utils/TableGen/RISCVVEmitter.cpp
Lines changed: 3 additions & 1 deletion
diff --git a/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion
@@ -452,6 +452,62 @@ RVV_VECTOR_TYPE_FLOAT("__rvv_float64m2x4_t", RvvFloat64m2x4, RvvFloat64m2x4Ty, 2
 
 RVV_VECTOR_TYPE_FLOAT("__rvv_float64m4x2_t", RvvFloat64m4x2, RvvFloat64m4x2Ty, 4, 64, 2)
 
+//===- BFloat16 tuple types -------------------------------------------------===//
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x2_t", RvvBFloat16mf4x2, RvvBFloat16mf4x2Ty,
+                       1, 16, 2)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x3_t", RvvBFloat16mf4x3, RvvBFloat16mf4x3Ty,
+                       1, 16, 3)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x4_t", RvvBFloat16mf4x4, RvvBFloat16mf4x4Ty,
+                       1, 16, 4)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x5_t", RvvBFloat16mf4x5, RvvBFloat16mf4x5Ty,
+                       1, 16, 5)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x6_t", RvvBFloat16mf4x6, RvvBFloat16mf4x6Ty,
+                       1, 16, 6)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x7_t", RvvBFloat16mf4x7, RvvBFloat16mf4x7Ty,
+                       1, 16, 7)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4x8_t", RvvBFloat16mf4x8, RvvBFloat16mf4x8Ty,
+                       1, 16, 8)
+
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x2_t", RvvBFloat16mf2x2, RvvBFloat16mf2x2Ty,
+                       2, 16, 2)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x3_t", RvvBFloat16mf2x3, RvvBFloat16mf2x3Ty,
+                       2, 16, 3)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x4_t", RvvBFloat16mf2x4, RvvBFloat16mf2x4Ty,
+                       2, 16, 4)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x5_t", RvvBFloat16mf2x5, RvvBFloat16mf2x5Ty,
+                       2, 16, 5)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x6_t", RvvBFloat16mf2x6, RvvBFloat16mf2x6Ty,
+                       2, 16, 6)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x7_t", RvvBFloat16mf2x7, RvvBFloat16mf2x7Ty,
+                       2, 16, 7)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2x8_t", RvvBFloat16mf2x8, RvvBFloat16mf2x8Ty,
+                       2, 16, 8)
+
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x2_t", RvvBFloat16m1x2, RvvBFloat16m1x2Ty,
+                       4, 16, 2)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x3_t", RvvBFloat16m1x3, RvvBFloat16m1x3Ty,
+                       4, 16, 3)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x4_t", RvvBFloat16m1x4, RvvBFloat16m1x4Ty,
+                       4, 16, 4)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x5_t", RvvBFloat16m1x5, RvvBFloat16m1x5Ty,
+                       4, 16, 5)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x6_t", RvvBFloat16m1x6, RvvBFloat16m1x6Ty,
+                       4, 16, 6)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x7_t", RvvBFloat16m1x7, RvvBFloat16m1x7Ty,
+                       4, 16, 7)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1x8_t", RvvBFloat16m1x8, RvvBFloat16m1x8Ty,
+                       4, 16, 8)
+
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x2_t", RvvBFloat16m2x2, RvvBFloat16m2x2Ty,
+                       8, 16, 2)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x3_t", RvvBFloat16m2x3, RvvBFloat16m2x3Ty,
+                       8, 16, 3)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2x4_t", RvvBFloat16m2x4, RvvBFloat16m2x4Ty,
+                       8, 16, 4)
+
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m4x2_t", RvvBFloat16m4x2, RvvBFloat16m4x2Ty,
+                       16, 16, 2)
+
 #undef RVV_VECTOR_TYPE_BFLOAT
 #undef RVV_VECTOR_TYPE_FLOAT
 #undef RVV_VECTOR_TYPE_INT
 
@@ -1101,7 +1101,7 @@ enum PredefinedTypeIDs {
 ///
 /// Type IDs for non-predefined types will start at
 /// NUM_PREDEF_TYPE_IDs.
-const unsigned NUM_PREDEF_TYPE_IDS = 500;
+const unsigned NUM_PREDEF_TYPE_IDS = 600;
 
 // Ensure we do not overrun the predefined types we reserved
 // in the enum PredefinedTypeIDs above.
 
@@ -97,13 +97,14 @@ enum class TypeModifier : uint8_t {
   UnsignedInteger = 1 << 3,
   SignedInteger = 1 << 4,
   Float = 1 << 5,
+  BFloat = 1 << 6,
   // LMUL1 should be kind of VectorTypeModifier, but that might come with
   // Widening2XVector for widening reduction.
   // However that might require VectorTypeModifier become bitmask rather than
   // simple enum, so we decide keek LMUL1 in TypeModifier for code size
   // optimization of clang binary size.
-  LMUL1 = 1 << 6,
-  MaxOffset = 6,
+  LMUL1 = 1 << 7,
+  MaxOffset = 7,
   LLVM_MARK_AS_BITMASK_ENUM(LMUL1),
 };
 
 
@@ -857,6 +857,9 @@ void RVVType::applyModifier(const PrototypeDescriptor &Transformer) {
     case TypeModifier::Float:
       ScalarType = ScalarTypeKind::Float;
       break;
+    case TypeModifier::BFloat:
+      ScalarType = ScalarTypeKind::BFloat;
+      break;
     case TypeModifier::LMUL1:
       LMUL = LMULType(0);
       // Update ElementBitwidth need to update Scale too.
 
@@ -305,6 +305,31 @@
 // CHECK-NEXT:    [[F64M2X3:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 8
 // CHECK-NEXT:    [[F64M2X4:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }, align 8
 // CHECK-NEXT:    [[F64M4X2:%.*]] = alloca { <vscale x 4 x double>, <vscale x 4 x double> }, align 8
+// CHECK-NEXT:    [[BF16MF4X2:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X3:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X4:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X5:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X6:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X7:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF4X8:%.*]] = alloca { <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X2:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X3:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X4:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X5:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X6:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X7:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16MF2X8:%.*]] = alloca { <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X2:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X3:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X4:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X5:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X6:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X7:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M1X8:%.*]] = alloca { <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M2X2:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M2X3:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M2X4:%.*]] = alloca { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }, align 2
+// CHECK-NEXT:    [[BF16M4X2:%.*]] = alloca { <vscale x 16 x bfloat>, <vscale x 16 x bfloat> }, align 2
 // CHECK-NEXT:    ret void
 //
 void foo () {
@@ -664,4 +689,34 @@ void foo () {
   vfloat64m2x4_t f64m2x4;
 
   vfloat64m4x2_t f64m4x2;
+  // bf16
+  vbfloat16mf4x2_t bf16mf4x2;
+  vbfloat16mf4x3_t bf16mf4x3;
+  vbfloat16mf4x4_t bf16mf4x4;
+  vbfloat16mf4x5_t bf16mf4x5;
+  vbfloat16mf4x6_t bf16mf4x6;
+  vbfloat16mf4x7_t bf16mf4x7;
+  vbfloat16mf4x8_t bf16mf4x8;
+
+  vbfloat16mf2x2_t bf16mf2x2;
+  vbfloat16mf2x3_t bf16mf2x3;
+  vbfloat16mf2x4_t bf16mf2x4;
+  vbfloat16mf2x5_t bf16mf2x5;
+  vbfloat16mf2x6_t bf16mf2x6;
+  vbfloat16mf2x7_t bf16mf2x7;
+  vbfloat16mf2x8_t bf16mf2x8;
+
+  vbfloat16m1x2_t bf16m1x2;
+  vbfloat16m1x3_t bf16m1x3;
+  vbfloat16m1x4_t bf16m1x4;
+  vbfloat16m1x5_t bf16m1x5;
+  vbfloat16m1x6_t bf16m1x6;
+  vbfloat16m1x7_t bf16m1x7;
+  vbfloat16m1x8_t bf16m1x8;
+
+  vbfloat16m2x2_t bf16m2x2;
+  vbfloat16m2x3_t bf16m2x3;
+  vbfloat16m2x4_t bf16m2x4;
+
+  vbfloat16m4x2_t bf16m4x2;
 }
@@ -401,7 +401,9 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
         auto TupleT = TypeCache.computeType(
             BT, Log2LMUL,
             PrototypeDescriptor(BaseTypeModifier::Vector, getTupleVTM(NF),
-                                TypeModifier::Float));
+                                (BT == BasicType::BFloat16
+                                     ? TypeModifier::BFloat
+                                     : TypeModifier::Float)));
         if (TupleT)
           printType(*TupleT);
       }
 
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 480794
+#define LLVM_MAIN_REVISION 480796
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP