[API] Fix int overflow and float16 support for paddle.frac (#72815)

xkkkkkk23 · web-flow · commit d4aaad334b68 · 2025-05-21T11:07:29.000+08:00
diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu
@@ -13,9 +13,9 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/trunc_kernel.h"
-
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
+#include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/amp_type_traits.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -59,7 +59,7 @@ class TruncFunctor<int64_t> {
 
 template <typename T>
 __global__ void Trunc(const T* x, T* out, int64_t N) {
-  CUDA_KERNEL_LOOP(index, N) {
+  CUDA_KERNEL_LOOP_TYPE(index, N, int64_t) {
     TruncFunctor<T> functor(x[index]);
     out[index] = functor();
   }
@@ -73,11 +73,10 @@ void TruncKernel(const Context& dev_ctx,
   auto* out_data = dev_ctx.template Alloc<T>(out);
 
   int64_t numel = x.numel();
+  auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, numel);
 
-  int threads = PADDLE_CUDA_NUM_THREADS;
-  int blocks = (numel + threads - 1) / threads;
-
-  Trunc<<<blocks, threads>>>(x_data, out_data, numel);
+  Trunc<<<config.block_per_grid, config.thread_per_block>>>(
+      x_data, out_data, numel);
 }
 
 }  // namespace phi
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
@@ -6946,13 +6946,15 @@ def frac(x: Tensor, name: str | None = None) -> Tensor:
         paddle.int64,
         paddle.float32,
         paddle.float64,
+        paddle.float16,
         DataType.INT32,
         DataType.INT64,
         DataType.FLOAT32,
         DataType.FLOAT64,
+        DataType.FLOAT16,
     ]:
         raise TypeError(
-            f"The data type of input must be one of ['int32', 'int64', 'float32', 'float64'], but got {x.dtype}"
+            f"The data type of input must be one of ['int32', 'int64', 'float32', 'float64', 'float16'], but got {x.dtype}"
         )
     if in_dynamic_or_pir_mode():
         y = _C_ops.trunc(x)
@@ -6963,7 +6965,7 @@ def frac(x: Tensor, name: str | None = None) -> Tensor:
 
         helper = LayerHelper("trunc", **locals())
         check_variable_and_dtype(
-            x, "X", ['int32', 'int64', 'float32', 'float64'], 'trunc'
+            x, "X", ['int32', 'int64', 'float32', 'float64', 'float16'], 'trunc'
         )
         y = helper.create_variable_for_type_inference(dtype=x.dtype)
         helper.append_op(
@@ -6984,9 +6986,10 @@ def frac_(x: Tensor, name: str | None = None) -> Tensor:
         paddle.int64,
         paddle.float32,
         paddle.float64,
+        paddle.float16,
     ]:
         raise TypeError(
-            f"The data type of input must be one of ['int32', 'int64', 'float32', 'float64'], but got {x.dtype}"
+            f"The data type of input must be one of ['int32', 'int64', 'float32', 'float64', 'float16'], but got {x.dtype}"
         )
     if in_dynamic_mode():
         y = _C_ops.trunc(x)