jianyizh
diff --git a/‎aten/src/ATen/DLConvertor.cpp
Lines changed: 0 additions & 2 deletions b/‎aten/src/ATen/DLConvertor.cpp
Lines changed: 0 additions & 2 deletions
diff --git a/‎aten/src/ATen/Dispatch_v2.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/Dispatch_v2.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/native/Copy.cpp
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/native/Copy.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/native/TensorCompare.cpp
Lines changed: 1 addition & 2 deletions b/‎aten/src/ATen/native/TensorCompare.cpp
Lines changed: 1 addition & 2 deletions
diff --git a/‎aten/src/ATen/native/cpu/CopyKernel.cpp
Lines changed: 4 additions & 4 deletions b/‎aten/src/ATen/native/cpu/CopyKernel.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎aten/src/ATen/native/cpu/FillKernel.cpp
Lines changed: 0 additions & 3 deletions b/‎aten/src/ATen/native/cpu/FillKernel.cpp
Lines changed: 0 additions & 3 deletions
diff --git a/‎aten/src/ATen/native/cpu/IndexKernel.cpp
Lines changed: 1 addition & 7 deletions b/‎aten/src/ATen/native/cpu/IndexKernel.cpp
Lines changed: 1 addition & 7 deletions
diff --git a/‎aten/src/ATen/native/cuda/Copy.cu
Lines changed: 1 addition & 23 deletions b/‎aten/src/ATen/native/cuda/Copy.cu
Lines changed: 1 addition & 23 deletions
diff --git a/‎aten/src/ATen/native/cuda/Indexing.cu
Lines changed: 5 additions & 35 deletions b/‎aten/src/ATen/native/cuda/Indexing.cu
Lines changed: 5 additions & 35 deletions
diff --git a/‎aten/src/ATen/native/cuda/jit_utils.h
Lines changed: 0 additions & 4 deletions b/‎aten/src/ATen/native/cuda/jit_utils.h
Lines changed: 0 additions & 4 deletions
diff --git a/‎c10/core/Scalar.h
Lines changed: 9 additions & 2 deletions b/‎c10/core/Scalar.h
Lines changed: 9 additions & 2 deletions
diff --git a/‎c10/core/ScalarType.cpp
Lines changed: 0 additions & 3 deletions b/‎c10/core/ScalarType.cpp
Lines changed: 0 additions & 3 deletions
diff --git a/‎c10/core/ScalarType.h
Lines changed: 3 additions & 19 deletions b/‎c10/core/ScalarType.h
Lines changed: 3 additions & 19 deletions
@@ -63,12 +63,10 @@ DLDataType getDLDataType(const Tensor& t) {
     case ScalarType::BFloat16:
       dtype.code = DLDataTypeCode::kDLBfloat;
       break;
-    // TODO(#146647): use macro here instead of spelling out each shell dtype
     case ScalarType::Float8_e5m2:
     case ScalarType::Float8_e5m2fnuz:
     case ScalarType::Float8_e4m3fn:
     case ScalarType::Float8_e4m3fnuz:
-    case ScalarType::Float8_e8m0fnu:
       TORCH_CHECK(false, "float8 types are not supported by dlpack");
       break;
     case ScalarType::QInt8:
 
@@ -87,7 +87,7 @@
 
 #define AT_FLOAT8_TYPES                                          \
   c10::kFloat8_e5m2, c10::kFloat8_e5m2fnuz, c10::kFloat8_e4m3fn, \
-      c10::kFloat8_e4m3fnuz, c10::kFloat8_e8m0fnu
+      c10::kFloat8_e4m3fnuz
 
 #define AT_INTEGRAL_TYPES \
   c10::kByte, c10::kChar, c10::kInt, c10::kLong, c10::kShort
 
@@ -59,8 +59,8 @@ bool copy_transpose_valid(const Tensor& self, const Tensor& src) {
 #if !defined(C10_MOBILE)
 #define _AT_DISPATCH_CP_TYPES(TYPE, NAME, ...)                              \
         AT_DISPATCH_V2(                             \
-            TYPE, NAME, AT_WRAP(__VA_ARGS__), kComplexHalf, kHalf, kBool, kBFloat16,            \
-            AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
+            TYPE, NAME, AT_WRAP(__VA_ARGS__), kComplexHalf, kHalf, kBool, kBFloat16, kFloat8_e5m2,            \
+            kFloat8_e4m3fn, kFloat8_e5m2fnuz, kFloat8_e4m3fnuz, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
 #else
 #define _AT_DISPATCH_CP_TYPES(TYPE, NAME, ...)     \
         AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND4(    \
 
@@ -460,8 +460,7 @@ Tensor isinf(const Tensor& self) {
 
 Tensor isfinite(const Tensor& self) {
   // Note: Integral tensor values are always finite
-  if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/true) ||
-      self.scalar_type() == kFloat8_e8m0fnu) {
+  if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/true)) {
     return at::ones_like(self, at::kBool, at::MemoryFormat::Preserve);
   }
 
 
@@ -204,12 +204,12 @@ static void reduced_float_copy_kernel(TensorIteratorBase &iter, bool requires_ne
 #define _AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...)                                       \
         AT_DISPATCH_V2(TYPE, NAME, AT_WRAP(__VA_ARGS__),                                       \
             kComplexHalf, kHalf, kBool,              \
-            kBFloat16, AT_EXPAND(AT_FLOAT8_TYPES), \
-            AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
+            kBFloat16, kFloat8_e5m2, kFloat8_e4m3fn, \
+            kFloat8_e5m2fnuz, kFloat8_e4m3fnuz, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
 #define _AT_DISPATCH_ALL_TYPES_NO_CF(TYPE, NAME, ...)              \
         AT_DISPATCH_V2(TYPE, NAME, AT_WRAP(__VA_ARGS__),                    \
-            kBool, kHalf, kBFloat16, AT_EXPAND(AT_FLOAT8_TYPES), \
-            AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
+            kBool, kHalf, kBFloat16, kFloat8_e5m2, kFloat8_e4m3fn, \
+            kFloat8_e5m2fnuz, kFloat8_e4m3fnuz, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES))
 #else
 #define _AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...)                                               \
         AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND4(                                               \
 
@@ -51,9 +51,6 @@ void fill_kernel(TensorIterator& iter, const Scalar& value_scalar) {
     fill_non_native_type<at::Float8_e4m3fnuz>(iter, value_scalar);
   } else if (iter.dtype() == ScalarType::Float8_e5m2fnuz) {
     fill_non_native_type<at::Float8_e5m2fnuz>(iter, value_scalar);
-  } else if (iter.dtype() == ScalarType::Float8_e8m0fnu) {
-    // TODO(#146647): use macro here instead of spelling out each float8 dtype
-    fill_non_native_type<at::Float8_e8m0fnu>(iter, value_scalar);
   } else {
     AT_DISPATCH_V2(
       iter.dtype(), "fill_cpu", AT_WRAP([&]() {
 
@@ -184,13 +184,7 @@ void index_put_kernel(TensorIterator& iter, IntArrayRef index_size, IntArrayRef
       }
     }),
     AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-    // AT_EXPAND(AT_FLOAT8_TYPES),
-    // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-    // should not be supported here, then reenable AT_FLOAT8_DTYPES
-    kFloat8_e4m3fn,
-    kFloat8_e5m2,
-    kFloat8_e4m3fnuz,
-    kFloat8_e5m2fnuz,
+    AT_EXPAND(AT_FLOAT8_TYPES),
     kComplexHalf,
     kHalf,
     kBool,
 
@@ -144,28 +144,6 @@ void float8_copy_kernel_cuda(TensorIteratorBase &iter) {
          gpu_kernel(iter, [] GPU_LAMBDA(Float8_e5m2fnuz x) { return x; });
          break;
     }
-  } else if (dtype == kFloat8_e8m0fnu) {
-    // TODO(#146647): clean this up, too much copy-pasta
-    switch (other_dtype) {
-      case kFloat:
-         gpu_kernel_nocast(iter, [] GPU_LAMBDA(float value) {
-             return Float8_e8m0fnu(value);
-         });
-         break;
-      case kHalf:
-         gpu_kernel_nocast(iter, [] GPU_LAMBDA(Half value) {
-             return Float8_e8m0fnu(value);
-         });
-         break;
-      case kBFloat16:
-         gpu_kernel_nocast(iter, [] GPU_LAMBDA(BFloat16 value) {
-             return Float8_e8m0fnu(value);
-         });
-         break;
-      default:
-         gpu_kernel(iter, [] GPU_LAMBDA(Float8_e8m0fnu x) { return x; });
-         break;
-    }
   } else {
     TORCH_CHECK(false, "This supposed ot be called only for Float8 types");
   }
@@ -179,7 +157,7 @@ void direct_copy_kernel_cuda(TensorIteratorBase &iter) {
     AT_DISPATCH_QINT_TYPES(dtype, "copy_", [&] {
       gpu_kernel(iter, [] GPU_LAMBDA(scalar_t x) { return x; });
     });
-  } else if (isFloat8Type(dtype)) {
+  } else if (dtype == kFloat8_e5m2 || dtype == kFloat8_e4m3fn || dtype == kFloat8_e5m2fnuz || dtype == kFloat8_e4m3fnuz) {
      float8_copy_kernel_cuda(iter);
   } else if (iter.dtype(1) == kFloat && (dtype == kBFloat16 || dtype == kHalf)) {
      if (dtype == kBFloat16) {
 
@@ -708,13 +708,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<std::optional<Ten
             C10_CUDA_KERNEL_LAUNCH_CHECK();
           }),
           AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-          // AT_EXPAND(AT_FLOAT8_TYPES),
-          // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-          // should not be supported here, then reenable AT_FLOAT8_DTYPES
-          kFloat8_e4m3fn,
-          kFloat8_e5m2,
-          kFloat8_e4m3fnuz,
-          kFloat8_e5m2fnuz,
+          AT_EXPAND(AT_FLOAT8_TYPES),
           kComplexHalf,
           kHalf,
           kBool,
@@ -740,13 +734,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<std::optional<Ten
               C10_CUDA_KERNEL_LAUNCH_CHECK();
             }),
             AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-            // AT_EXPAND(AT_FLOAT8_TYPES),
-            // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-            // should not be supported here, then reenable AT_FLOAT8_DTYPES
-            kFloat8_e4m3fn,
-            kFloat8_e5m2,
-            kFloat8_e4m3fnuz,
-            kFloat8_e5m2fnuz,
+            AT_EXPAND(AT_FLOAT8_TYPES),
             kComplexHalf,
             kHalf,
             kBool,
@@ -770,13 +758,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<std::optional<Ten
                 C10_CUDA_KERNEL_LAUNCH_CHECK();
               }),
               AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-              // AT_EXPAND(AT_FLOAT8_TYPES),
-              // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-              // should not be supported here, then reenable AT_FLOAT8_DTYPES
-              kFloat8_e4m3fn,
-              kFloat8_e5m2,
-              kFloat8_e4m3fnuz,
-              kFloat8_e5m2fnuz,
+              AT_EXPAND(AT_FLOAT8_TYPES),
               kComplexHalf,
               kHalf,
               kBool,
@@ -798,13 +780,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<std::optional<Ten
                 C10_CUDA_KERNEL_LAUNCH_CHECK();
               }),
               AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-              // AT_EXPAND(AT_FLOAT8_TYPES),
-              // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-              // should not be supported here, then reenable AT_FLOAT8_DTYPES
-              kFloat8_e4m3fn,
-              kFloat8_e5m2,
-              kFloat8_e4m3fnuz,
-              kFloat8_e5m2fnuz,
+              AT_EXPAND(AT_FLOAT8_TYPES),
               kComplexHalf,
               kHalf,
               kBool,
@@ -829,13 +805,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List<std::optional<Ten
               C10_CUDA_KERNEL_LAUNCH_CHECK();
             }),
             AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX),
-            // AT_EXPAND(AT_FLOAT8_TYPES),
-            // TODO(#113663): clean up accumulation behavior in float8 dtypes, accumulate=True
-            // should not be supported here, then reenable AT_FLOAT8_DTYPES
-            kFloat8_e4m3fn,
-            kFloat8_e5m2,
-            kFloat8_e4m3fnuz,
-            kFloat8_e5m2fnuz,
+            AT_EXPAND(AT_FLOAT8_TYPES),
             kComplexHalf,
             kHalf,
             kBool,
 
@@ -228,10 +228,6 @@ template <> inline std::string typeName<at::Float8_e5m2fnuz>() {
 template <> inline std::string typeName<at::Float8_e4m3fnuz>() {
     return "at::Float8_e4m3fnuz";
 }
-template <> inline std::string typeName<at::Float8_e8m0fnu>() {
-    // TODO(#146647): Can the code here be made generic for any scalartype?
-    return "at::Float8_e8m0fnu";
-}
 
 #define TYPE_NAME_CASE(ctype, scalartype)                    \
   case ScalarType::scalartype:  return typeName<ctype>();
 
@@ -49,9 +49,16 @@ class C10_API Scalar {
 #define DEFINE_IMPLICIT_CTOR(type, name) \
   Scalar(type vv) : Scalar(vv, true) {}
 
-  AT_FORALL_SCALAR_TYPES_AND3(Half, BFloat16, ComplexHalf, DEFINE_IMPLICIT_CTOR)
+  AT_FORALL_SCALAR_TYPES_AND7(
+      Half,
+      BFloat16,
+      Float8_e5m2,
+      Float8_e4m3fn,
+      Float8_e5m2fnuz,
+      Float8_e4m3fnuz,
+      ComplexHalf,
+      DEFINE_IMPLICIT_CTOR)
   AT_FORALL_COMPLEX_TYPES(DEFINE_IMPLICIT_CTOR)
-  AT_FORALL_FLOAT8_TYPES(DEFINE_IMPLICIT_CTOR)
 
   // Helper constructors to allow Scalar creation from long and long long types
   // As std::is_same_v<long, long long> is false(except Android), one needs to
 
@@ -222,9 +222,6 @@ std::pair<std::string, std::string> getDtypeNames(c10::ScalarType scalarType) {
       return std::make_pair("float8_e5m2fnuz", "");
     case c10::ScalarType::Float8_e4m3fnuz:
       return std::make_pair("float8_e4m3fnuz", "");
-    case c10::ScalarType::Float8_e8m0fnu:
-      // TODO(#146647): macroify all of this
-      return std::make_pair("float8_e8m0fnu", "");
     default:
       throw std::runtime_error("Unimplemented scalar type");
   }
 
@@ -7,7 +7,6 @@
 #include <c10/util/Float8_e4m3fnuz.h>
 #include <c10/util/Float8_e5m2.h>
 #include <c10/util/Float8_e5m2fnuz.h>
-#include <c10/util/Float8_e8m0fnu.h>
 #include <c10/util/Half.h>
 #include <c10/util/bits.h>
 #include <c10/util/complex.h>
@@ -103,8 +102,7 @@ struct dummy_int1_7_t {};
   _(c10::dummy_int1_7_t<4>, Int4) /* 40 */               \
   _(c10::dummy_int1_7_t<5>, Int5) /* 41 */               \
   _(c10::dummy_int1_7_t<6>, Int6) /* 42 */               \
-  _(c10::dummy_int1_7_t<7>, Int7) /* 43 */               \
-  _(c10::Float8_e8m0fnu, Float8_e8m0fnu) /* 44 */
+  _(c10::dummy_int1_7_t<7>, Int7) /* 43 */
 
 // If you want to support ComplexHalf for real, add ComplexHalf
 // into this macro (and change the name).  But beware: convert()
@@ -148,8 +146,7 @@ struct dummy_int1_7_t {};
   _(at::Float8_e5m2, Float8_e5m2)              \
   _(at::Float8_e4m3fn, Float8_e4m3fn)          \
   _(at::Float8_e5m2fnuz, Float8_e5m2fnuz)      \
-  _(at::Float8_e4m3fnuz, Float8_e4m3fnuz)      \
-  _(at::Float8_e8m0fnu, Float8_e8m0fnu)
+  _(at::Float8_e4m3fnuz, Float8_e4m3fnuz)
 
 enum class ScalarType : int8_t {
 #define DEFINE_ST_ENUM_VAL_(_1, n) n,
@@ -320,13 +317,6 @@ AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_AND_QINTS(SPECIALIZE_CppTypeToScalarType)
   _(c10::quint4x2, QUInt4x2)    \
   _(c10::quint2x4, QUInt2x4)
 
-#define AT_FORALL_FLOAT8_TYPES(_)         \
-  _(at::Float8_e5m2, Float8_e5m2)         \
-  _(at::Float8_e4m3fn, Float8_e4m3fn)     \
-  _(at::Float8_e5m2fnuz, Float8_e5m2fnuz) \
-  _(at::Float8_e4m3fnuz, Float8_e4m3fnuz) \
-  _(at::Float8_e8m0fnu, Float8_e8m0fnu)
-
 #define AT_FORALL_COMPLEX_TYPES(_)     \
   _(c10::complex<float>, ComplexFloat) \
   _(c10::complex<double>, ComplexDouble)
@@ -382,8 +372,7 @@ inline bool isIntegralType(ScalarType t) {
 
 inline bool isFloat8Type(ScalarType t) {
   return t == ScalarType::Float8_e5m2 || t == ScalarType::Float8_e5m2fnuz ||
-      t == ScalarType::Float8_e4m3fn || t == ScalarType::Float8_e4m3fnuz ||
-      t == ScalarType::Float8_e8m0fnu;
+      t == ScalarType::Float8_e4m3fn || t == ScalarType::Float8_e4m3fnuz;
 }
 
 inline bool isReducedFloatingType(ScalarType t) {
@@ -457,10 +446,6 @@ inline bool isSignedType(ScalarType t) {
     return std::numeric_limits< \
         ::c10::impl::ScalarTypeToCPPTypeT<ScalarType::name>>::is_signed;
 
-  // TODO(#146647): If we expect to have numeric_limits for everything,
-  // let's just have a big macro for the whole thing.
-  // If we're hardcoding it, let's just use the macro and a "true"/"false"
-  // below?
   switch (t) {
     case ScalarType::QInt8:
     case ScalarType::QUInt8:
@@ -482,7 +467,6 @@ inline bool isSignedType(ScalarType t) {
       CASE_ISSIGNED(Float8_e5m2fnuz);
       CASE_ISSIGNED(Float8_e4m3fn);
       CASE_ISSIGNED(Float8_e4m3fnuz);
-      CASE_ISSIGNED(Float8_e8m0fnu);
       CASE_ISSIGNED(Byte);
       CASE_ISSIGNED(Char);
       CASE_ISSIGNED(Short);
Original file line number	Diff line number	Diff line change
`@@ -460,8 +460,7 @@ Tensor isinf(const Tensor& self) {`
`460`	`460`
`461`	`461`	`Tensor isfinite(const Tensor& self) {`
`462`	`462`	`// Note: Integral tensor values are always finite`
`463`		`- if (c10::isIntegralType(self.scalar_type(), /includeBool=/true) \|\|`
`464`		`- self.scalar_type() == kFloat8_e8m0fnu) {`
	`463`	`+ if (c10::isIntegralType(self.scalar_type(), /includeBool=/true)) {`
`465`	`464`	`return at::ones_like(self, at::kBool, at::MemoryFormat::Preserve);`
`466`	`465`	`}`
`467`	`466`