Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 35 additions & 5 deletions paddle/phi/kernels/cpu/gaussian_inplace_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,49 @@ limitations under the License. */

#include "paddle/phi/kernels/gaussian_inplace_grad_kernel.h"

#include "paddle/phi/common/type_traits.h"
#include "paddle/phi/core/kernel_registry.h"

namespace phi {

// If T is not complex
template <
typename T,
typename Context,
std::enable_if_t<!std::is_same<T, phi::dtype::complex<float>>::value &&
!std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianInplaceGrad(const Context& ctx, DenseTensor* x_grad) {
if (x_grad) {
auto* data = ctx.template Alloc<T>(x_grad);
std::fill(data, data + x_grad->numel(), T(0));
}
}

// If T is complex
template <
typename T,
typename Context,
std::enable_if_t<std::is_same<T, phi::dtype::complex<float>>::value ||
std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianInplaceGrad(const Context& ctx, DenseTensor* x_grad) {
if (x_grad) {
auto* data = ctx.template Alloc<T>(x_grad);
T value = T(static_cast<phi::dtype::Real<T>>(0.0f),
static_cast<phi::dtype::Real<T>>(0.0f));
std::fill(data, data + x_grad->numel(), value);
}
}

template <typename T, typename Context>
void GaussianInplaceGradKernel(const Context& ctx,
const DenseTensor& out_grad UNUSED,
float mean UNUSED,
float std UNUSED,
int seed UNUSED,
DenseTensor* x_grad) {
if (x_grad) {
auto* data = ctx.template Alloc<T>(x_grad);
std::fill(data, data + x_grad->numel(), T(0));
}
GaussianInplaceGrad<T>(ctx, x_grad);
}

} // namespace phi
Expand All @@ -38,4 +66,6 @@ PD_REGISTER_KERNEL(gaussian_inplace_grad,
ALL_LAYOUT,
phi::GaussianInplaceGradKernel,
float,
double) {}
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
13 changes: 7 additions & 6 deletions paddle/phi/kernels/cpu/gaussian_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ void GaussianInplaceKernel(const Context& dev_ctx,
int seed,
DenseTensor* out) {
T* data = dev_ctx.template Alloc<T>(out);
std::normal_distribution<T> dist(mean, std);

int64_t size = out->numel();
std::shared_ptr<std::mt19937_64> engine;
Expand All @@ -59,9 +58,7 @@ void GaussianInplaceKernel(const Context& dev_ctx,
engine = dev_ctx.GetGenerator()->GetCPUEngine();
}

for (int64_t i = 0; i < size; ++i) {
data[i] = dist(*engine);
}
NormalDistribution<T>(data, size, mean, std, engine);
}

} // namespace phi
Expand All @@ -73,11 +70,15 @@ PD_REGISTER_KERNEL(gaussian,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double) {}
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(gaussian_inplace,
CPU,
ALL_LAYOUT,
phi::GaussianInplaceKernel,
float,
double) {}
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
30 changes: 30 additions & 0 deletions paddle/phi/kernels/funcs/norm_distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,34 @@ inline void NormalDistribution(phi::dtype::bfloat16* data,
}
}

template <>
inline void NormalDistribution(phi::dtype::complex<float>* data,
const int64_t& size,
const float& mean,
const float& std,
std::shared_ptr<std::mt19937_64> engine) {
float std_of_real_or_imag = std::sqrt(std::pow(std, 2) / 2);
std::normal_distribution<float> dist(mean, std_of_real_or_imag);
for (int64_t i = 0; i < size; ++i) {
float real = dist(*engine);
float imag = dist(*engine);
data[i] = phi::dtype::complex<float>(real, imag);
}
}

template <>
inline void NormalDistribution(phi::dtype::complex<double>* data,
const int64_t& size,
const float& mean,
const float& std,
std::shared_ptr<std::mt19937_64> engine) {
float std_of_real_or_imag = std::sqrt(std::pow(std, 2) / 2);
std::normal_distribution<double> dist(mean, std_of_real_or_imag);
for (int64_t i = 0; i < size; ++i) {
double real = dist(*engine);
double imag = dist(*engine);
data[i] = phi::dtype::complex<double>(real, imag);
}
}

} // namespace phi
4 changes: 3 additions & 1 deletion paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ PD_REGISTER_KERNEL(gaussian_inplace_grad,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
161 changes: 155 additions & 6 deletions paddle/phi/kernels/gpu/gaussian_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,19 @@

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/generator.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/index_impl.cu.h"

namespace phi {

template <typename T>
using ComplexType = phi::dtype::complex<T>;

template <typename T>
struct GaussianGenerator {
T mean_, std_;
Expand All @@ -51,8 +56,41 @@ struct GaussianGenerator {
}
};

template <typename T, typename Context>
void GaussianKernel(const Context& dev_ctx,
template <typename T>
struct GaussianGenerator<ComplexType<T>> {
T mean_, std_;
unsigned int seed_;
unsigned int offset_ = 0;

__host__ __device__ GaussianGenerator(T mean, T std, int seed)
: mean_(mean), std_(std), seed_(seed) {}

__host__ __device__ GaussianGenerator(T mean, T std, int seed, int offset)
: mean_(mean), std_(std), seed_(seed), offset_(offset) {}

__host__ __device__ ComplexType<T> operator()(const unsigned int n) const {
thrust::minstd_rand rng_real;
thrust::minstd_rand rng_img;
rng_real.seed(seed_);
rng_img.seed(seed_);
thrust::normal_distribution<T> dist(mean_, std_);
unsigned int new_n = n + offset_;
rng_real.discard(new_n);
rng_img.discard(new_n);
T real = dist(rng_real);
T imag = dist(rng_img);
return ComplexType<T>(real, imag);
}
};

// If T is not complex
template <
typename T,
typename Context,
std::enable_if_t<!std::is_same<T, phi::dtype::complex<float>>::value &&
!std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianRandom(const Context& dev_ctx,
const IntArray& shape,
float mean,
float std,
Expand All @@ -76,8 +114,55 @@ void GaussianKernel(const Context& dev_ctx,
}
}

template <typename T, typename Context>
void GaussianInplaceKernel(const Context& dev_ctx,
// If T is complex
template <
typename T,
typename Context,
std::enable_if_t<std::is_same<T, phi::dtype::complex<float>>::value ||
std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianRandom(const Context& dev_ctx,
const IntArray& shape,
float mean,
float std,
int seed,
DataType dtype,
DenseTensor* out) {
out->Resize(common::make_ddim(shape.GetData()));
dev_ctx.template Alloc<T>(out);
float std_of_real_or_imag = std::sqrt(std::pow(std, 2) / 2);
if (seed == 0) {
// use global Generator seed
DenseTensor* out_real = new DenseTensor();
DenseTensor* out_imag = new DenseTensor();
out_real->Resize(common::make_ddim(shape.GetData()));
out_imag->Resize(common::make_ddim(shape.GetData()));
dev_ctx.template Alloc<T>(out_real);
dev_ctx.template Alloc<T>(out_imag);
funcs::normal_distribution<phi::dtype::Real<T>> dist;
funcs::normal_distribution<phi::dtype::Real<T>> dist_imag;
funcs::normal_transform<phi::dtype::Real<T>> trans(mean,
std_of_real_or_imag);
funcs::distribution_and_transform<phi::dtype::Real<T>>(
dev_ctx, out_real, dist, trans);
funcs::distribution_and_transform<phi::dtype::Real<T>>(
dev_ctx, out_imag, dist_imag, trans);
phi::ComplexKernel<phi::dtype::Real<T>>(dev_ctx, *out_real, *out_imag, out);
} else {
// use OP seed
auto func = GaussianGenerator<T>(mean, std_of_real_or_imag, seed);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
}
}

// If T is not complex
template <
typename T,
typename Context,
std::enable_if_t<!std::is_same<T, phi::dtype::complex<float>>::value &&
!std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianRandomInplace(const Context& dev_ctx,
const DenseTensor& x,
float mean,
float std,
Expand All @@ -99,6 +184,66 @@ void GaussianInplaceKernel(const Context& dev_ctx,
}
}

// If T is complex
template <
typename T,
typename Context,
std::enable_if_t<std::is_same<T, phi::dtype::complex<float>>::value ||
std::is_same<T, phi::dtype::complex<double>>::value,
bool> = true>
void GaussianRandomInplace(const Context& dev_ctx,
const DenseTensor& x,
float mean,
float std,
int seed,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
float std_of_real_or_imag = std::sqrt(std::pow(std, 2) / 2);
if (seed == 0) {
// use global Generator seed
DenseTensor* out_real = new DenseTensor();
DenseTensor* out_imag = new DenseTensor();
out_real->Resize(x.dims());
out_imag->Resize(x.dims());
dev_ctx.template Alloc<T>(out_real);
dev_ctx.template Alloc<T>(out_imag);
funcs::normal_distribution<phi::dtype::Real<T>> dist;
funcs::normal_distribution<phi::dtype::Real<T>> dist_imag;
funcs::normal_transform<phi::dtype::Real<T>> trans(mean,
std_of_real_or_imag);
funcs::distribution_and_transform<phi::dtype::Real<T>>(
dev_ctx, out_real, dist, trans);
funcs::distribution_and_transform<phi::dtype::Real<T>>(
dev_ctx, out_imag, dist_imag, trans);
phi::ComplexKernel<phi::dtype::Real<T>>(dev_ctx, *out_real, *out_imag, out);
} else {
// use OP seed
auto func = GaussianGenerator<T>(mean, std_of_real_or_imag, seed);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
}
}

template <typename T, typename Context>
void GaussianKernel(const Context& dev_ctx,
const IntArray& shape,
float mean,
float std,
int seed,
DataType dtype,
DenseTensor* out) {
GaussianRandom<T>(dev_ctx, shape, mean, std, seed, dtype, out);
}

template <typename T, typename Context>
void GaussianInplaceKernel(const Context& dev_ctx,
const DenseTensor& x,
float mean,
float std,
int seed,
DenseTensor* out) {
GaussianRandomInplace<T>(dev_ctx, x, mean, std, seed, out);
}

} // namespace phi

PD_REGISTER_KERNEL(gaussian,
Expand All @@ -108,7 +253,9 @@ PD_REGISTER_KERNEL(gaussian,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double) {}
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(gaussian_inplace,
GPU,
Expand All @@ -117,4 +264,6 @@ PD_REGISTER_KERNEL(gaussian_inplace,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double) {}
double,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
Loading