Skip to content

Commit f656e06

Browse files
authored
Rename ctx to dev_ctx in paddle/phi/kernels/fusion/xpu/ directory (#73161)
* Fix * Fix * Fix
1 parent 06b6078 commit f656e06

26 files changed

+177
-173
lines changed

paddle/phi/kernels/fusion/xpu/add_act_xpu_kernel.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace phi {
1919
namespace fusion {
2020

2121
template <typename T, typename Context>
22-
void AddActXPUKernel(const Context& ctx,
22+
void AddActXPUKernel(const Context& dev_ctx,
2323
const DenseTensor& x,
2424
const paddle::optional<DenseTensor>& x_max,
2525
const DenseTensor& y,
@@ -35,22 +35,22 @@ void AddActXPUKernel(const Context& ctx,
3535
auto* y_data = reinterpret_cast<const XPUType*>(y.data<T>());
3636
const float* y_max_data =
3737
y_max.get_ptr() == nullptr ? nullptr : y_max.get_ptr()->data<float>();
38-
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
38+
auto* out_data = reinterpret_cast<XPUType*>(dev_ctx.template Alloc<T>(out));
3939

4040
std::vector<int64_t> x_shape = common::vectorize(x.dims());
4141
std::vector<int64_t> y_shape = common::vectorize(y.dims());
4242
xpu::Activation_t act(static_cast<xpu::Activation_t::act_enum>(act_type));
4343
int r =
4444
xpu::add_activation_fusion<XPUType, XPUType, XPUType>( // TX/TY/TZ/TID
45-
/* baidu::xpu::api::Context* ctx */ ctx.x_context(),
45+
/* baidu::xpu::api::Context* ctx */ dev_ctx.x_context(),
4646
/* const TX* x */ x_data,
4747
/* const TY* y */ y_data,
4848
/* TZ* z */ out_data,
4949
/* const std::vector<int64_t>& x_shape */ x_shape,
5050
/* const std::vector<int64_t>& y_shape */ y_shape,
5151
/* const float* max_x */ x_max_data,
5252
/* const float* max_y */ y_max_data,
53-
/* float* max_z */ ctx.template Alloc<float>(out_max),
53+
/* float* max_z */ dev_ctx.template Alloc<float>(out_max),
5454
/* const baidu::xpu::api::Activation_t& act */ act);
5555
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add_act_xpu");
5656
}

paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static phi::DDim BroadCastInferShape(const DDim x_dims,
6666
}
6767

6868
template <typename T, typename Context>
69-
void AddLayernormXPUKernel(const Context& ctx,
69+
void AddLayernormXPUKernel(const Context& dev_ctx,
7070
const DenseTensor& x,
7171
const DenseTensor& y,
7272
const DenseTensor& scale,
@@ -88,10 +88,10 @@ void AddLayernormXPUKernel(const Context& ctx,
8888
int64_t m = layer_norm_x_mat_dims[0];
8989
int64_t n = layer_norm_x_mat_dims[1];
9090

91-
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
91+
auto* out_data = reinterpret_cast<XPUType*>(dev_ctx.template Alloc<T>(out));
9292

9393
int r = xpu::add_layer_norm_fusion<XPUType>( // T
94-
/* baidu::xpu::api::Context* ctx */ ctx.x_context(),
94+
/* baidu::xpu::api::Context* ctx */ dev_ctx.x_context(),
9595
/* const T* x */ x_data,
9696
/* const T* y */ y_data,
9797
/* T* z */ out_data,

paddle/phi/kernels/fusion/xpu/addcmul_xpu_kernel.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace phi {
1919
namespace fusion {
2020

2121
template <typename T, typename Context>
22-
void AddCMulXPUKernel(const Context& ctx,
22+
void AddCMulXPUKernel(const Context& dev_ctx,
2323
const DenseTensor& x,
2424
const DenseTensor& y,
2525
const DenseTensor& w,
@@ -29,18 +29,18 @@ void AddCMulXPUKernel(const Context& ctx,
2929
const auto* y_data = y.data<T>();
3030
const auto* w_data = w.data<T>();
3131

32-
auto* out_data = ctx.template Alloc<T>(out);
32+
auto* out_data = dev_ctx.template Alloc<T>(out);
3333

3434
#ifdef PADDLE_WITH_XPU_PLUGIN
35-
int r = xpu::plugin::fast_addcmul(ctx.x_context(),
35+
int r = xpu::plugin::fast_addcmul(dev_ctx.x_context(),
3636
reinterpret_cast<const XPUType*>(w_data),
3737
reinterpret_cast<const XPUType*>(x_data),
3838
reinterpret_cast<const XPUType*>(y_data),
3939
reinterpret_cast<XPUType*>(out_data),
4040
x.numel());
4141
PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_addcmul");
4242
#else
43-
int r = xpu::addcmul(ctx.x_context(),
43+
int r = xpu::addcmul(dev_ctx.x_context(),
4444
reinterpret_cast<const XPUType*>(w_data),
4545
reinterpret_cast<const XPUType*>(x_data),
4646
reinterpret_cast<const XPUType*>(y_data),

paddle/phi/kernels/fusion/xpu/conv1d_xpu_kernel.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace phi {
2323
namespace fusion {
2424

2525
template <typename T, typename Context>
26-
void Conv1dXPUKernel(const Context& ctx,
26+
void Conv1dXPUKernel(const Context& dev_ctx,
2727
const DenseTensor& x,
2828
const paddle::optional<DenseTensor>& x_max,
2929
const DenseTensor& filter,
@@ -65,8 +65,8 @@ void Conv1dXPUKernel(const Context& ctx,
6565
: branch_max.get_ptr()->data<float>();
6666
const float* bias_data =
6767
bias.get_ptr() == nullptr ? nullptr : bias.get_ptr()->data<float>();
68-
auto* out_data = reinterpret_cast<XPUType*>(ctx.template Alloc<T>(out));
69-
auto* out_max_data = ctx.template Alloc<float>(out_max);
68+
auto* out_data = reinterpret_cast<XPUType*>(dev_ctx.template Alloc<T>(out));
69+
auto* out_max_data = dev_ctx.template Alloc<float>(out_max);
7070

7171
xpu::Activation_t act(static_cast<xpu::Activation_t::act_enum>(act_type));
7272
if (act_type == xpu::Activation_t::LEAKY_RELU) {
@@ -76,7 +76,7 @@ void Conv1dXPUKernel(const Context& ctx,
7676
}
7777
int r =
7878
xpu::conv1d_fusion<XPUType, int16_t, XPUType, int16_t>( // TX/TW/TY/TGEMM
79-
/* baidu::xpu::api::Context* ctx */ ctx.x_context(),
79+
/* baidu::xpu::api::Context* ctx */ dev_ctx.x_context(),
8080
/* const TX* x */ input_data,
8181
/* const TW* weight */ filter_data,
8282
/* TY* y */ out_data,

paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ template <typename T_X,
2727
typename T_OUT,
2828
typename T_GEMM,
2929
typename Context>
30-
void Conv2dXPUKernelImpl(const Context& ctx,
30+
void Conv2dXPUKernelImpl(const Context& dev_ctx,
3131
const DenseTensor& x,
3232
const paddle::optional<DenseTensor>& x_max,
3333
const DenseTensor& filter,
@@ -83,7 +83,7 @@ void Conv2dXPUKernelImpl(const Context& ctx,
8383
? nullptr
8484
: branch_max.get_ptr()->data<float>();
8585
auto* branch_tensor = branch.get_ptr();
86-
xpu::ctx_guard RAII_GUARD(ctx.x_context());
86+
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
8787
if (branch_tensor != nullptr) {
8888
if (branch_tensor->dtype() == out->dtype()) {
8989
branch_data =
@@ -92,7 +92,7 @@ void Conv2dXPUKernelImpl(const Context& ctx,
9292
auto branch_data_temp =
9393
RAII_GUARD.alloc_l3_or_gm<XPUTypeOut>(branch_tensor->numel());
9494
int r = xpu::cast<XPUTypeX, XPUTypeOut>(
95-
ctx.x_context(),
95+
dev_ctx.x_context(),
9696
reinterpret_cast<const XPUTypeX*>(branch_tensor->data<T_X>()),
9797
branch_data_temp,
9898
branch_tensor->numel());
@@ -104,8 +104,8 @@ void Conv2dXPUKernelImpl(const Context& ctx,
104104
const float* bias_data =
105105
bias.get_ptr() == nullptr ? nullptr : bias.get_ptr()->data<float>();
106106
auto* out_data =
107-
reinterpret_cast<XPUTypeOut*>(ctx.template Alloc<T_OUT>(out));
108-
auto* out_max_data = ctx.template Alloc<float>(out_max);
107+
reinterpret_cast<XPUTypeOut*>(dev_ctx.template Alloc<T_OUT>(out));
108+
auto* out_max_data = dev_ctx.template Alloc<float>(out_max);
109109
out_max_data = out_max_in.get_ptr() != nullptr
110110
? const_cast<float*>(out_max_in.get_ptr()->data<float>())
111111
: out_max_data;
@@ -118,7 +118,7 @@ void Conv2dXPUKernelImpl(const Context& ctx,
118118

119119
int r = xpu::
120120
conv2d_fusion<XPUTypeX, XPUTypeW, XPUTypeOut, T_GEMM>( // TX/TW/TY/TGEMM
121-
/* baidu::xpu::api::Context* ctx */ ctx.x_context(),
121+
/* baidu::xpu::api::Context* ctx */ dev_ctx.x_context(),
122122
/* const TX* input */ input_data,
123123
/* const TW* filter */ filter_data,
124124
/* TY* output */ out_data,
@@ -147,7 +147,7 @@ void Conv2dXPUKernelImpl(const Context& ctx,
147147

148148
#define CONV2D_XPU_KERNEL_IMPL(x_dtype_, w_dtype_, out_dtype_, gemm_dtype_) \
149149
Conv2dXPUKernelImpl<x_dtype_, w_dtype_, out_dtype_, gemm_dtype_, Context>( \
150-
ctx, \
150+
dev_ctx, \
151151
x, \
152152
x_max, \
153153
filter, \
@@ -168,7 +168,7 @@ void Conv2dXPUKernelImpl(const Context& ctx,
168168
out_max);
169169

170170
template <typename T, typename Context>
171-
void Conv2dXPUKernel(const Context& ctx,
171+
void Conv2dXPUKernel(const Context& dev_ctx,
172172
const DenseTensor& x,
173173
const paddle::optional<DenseTensor>& x_max,
174174
const DenseTensor& filter,

paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
namespace phi {
2121
namespace fusion {
2222
template <typename T, typename Context>
23-
void Conv2dTransposeXPUKernel(const Context& ctx,
23+
void Conv2dTransposeXPUKernel(const Context& dev_ctx,
2424
const DenseTensor& x,
2525
const paddle::optional<DenseTensor>& x_max,
2626
const DenseTensor& filter,
@@ -41,8 +41,8 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
4141
DenseTensor* out_max) {
4242
using XPUType = typename XPUTypeTrait<T>::Type;
4343

44-
ctx.template Alloc<T>(out);
45-
ctx.template Alloc<float>(out_max);
44+
dev_ctx.template Alloc<T>(out);
45+
dev_ctx.template Alloc<float>(out_max);
4646
bool is_nchw;
4747
is_nchw = (data_format == "NHWC") ? false : true;
4848

@@ -73,7 +73,7 @@ void Conv2dTransposeXPUKernel(const Context& ctx,
7373
auto filter_max_data = filter_max.data<float>();
7474

7575
int r = xpu::conv2d_transpose_fusion_v2<XPUType, int16_t, XPUType, int16_t>(
76-
ctx.x_context(),
76+
dev_ctx.x_context(),
7777
reinterpret_cast<const XPUType*>(x.data<T>()),
7878
filter.data<int16_t>(),
7979
reinterpret_cast<XPUType*>(out->data<T>()),

paddle/phi/kernels/fusion/xpu/cross_attention_xpu_kernel.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ template <typename T_X,
2727
typename T_GEMM,
2828
typename Context>
2929
void CrossAttentionXPUKernelImpl(
30-
const Context& ctx,
30+
const Context& dev_ctx,
3131
const DenseTensor& input_q,
3232
const DenseTensor& input_kv,
3333
const std::vector<const DenseTensor*>& fc_weight,
@@ -47,7 +47,7 @@ void CrossAttentionXPUKernelImpl(
4747
auto* input_q_data = reinterpret_cast<const XPUTypeX*>(input_q.data<T_X>());
4848
auto* input_kv_data = reinterpret_cast<const XPUTypeX*>(input_kv.data<T_X>());
4949

50-
xpu::ctx_guard RAII_GUARD(ctx.x_context());
50+
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
5151

5252
XPUTypeFP16* q_data = RAII_GUARD.alloc_l3_or_gm<XPUTypeFP16>(input_q.numel());
5353
XPUTypeFP16* k_data =
@@ -85,7 +85,7 @@ void CrossAttentionXPUKernelImpl(
8585
for (int i = 0; i < 3; ++i) {
8686
int r = xpu::
8787
fc_fusion<XPUTypeX, XPUTypeW, XPUTypeFP16, T_GEMM>( // TX/TW/TY/TGEMM
88-
ctx.x_context(), // ctx
88+
dev_ctx.x_context(), // ctx
8989
loop_x[i], // x
9090
fc_weight_data_int16_t[i], // w
9191
loop_y[i], // y
@@ -111,8 +111,8 @@ void CrossAttentionXPUKernelImpl(
111111
int mask_dim_size = mask_dim.size();
112112
const float* mask_data = mask.data<float>();
113113
auto* qkv_data =
114-
reinterpret_cast<XPUTypeOut*>(ctx.template Alloc<T_QKV>(qkv));
115-
auto* qkv_max_data = ctx.template Alloc<float>(qkv_max);
114+
reinterpret_cast<XPUTypeOut*>(dev_ctx.template Alloc<T_QKV>(qkv));
115+
auto* qkv_max_data = dev_ctx.template Alloc<float>(qkv_max);
116116
std::vector<int64_t> z_shape(4, 1);
117117
if (mask_dim_size < 4) {
118118
int index = 4 - mask_dim_size;
@@ -145,7 +145,7 @@ void CrossAttentionXPUKernelImpl(
145145
XPUTypeFP16,
146146
XPUTypeFP16,
147147
XPUTypeFP16,
148-
XPUTypeGEMM>(ctx.x_context(),
148+
XPUTypeGEMM>(dev_ctx.x_context(),
149149
q_data,
150150
k_data,
151151
v_data,
@@ -160,13 +160,13 @@ void CrossAttentionXPUKernelImpl(
160160

161161
if (input_q.dtype() == DataType::FLOAT32) {
162162
int r_cast_out = xpu::cast<XPUTypeFP16, XPUTypeOut>(
163-
ctx.x_context(), qkv_temp_data, qkv_data, qkv->numel());
163+
dev_ctx.x_context(), qkv_temp_data, qkv_data, qkv->numel());
164164
PADDLE_ENFORCE_XDNN_SUCCESS(
165165
r_cast_out, "cross_attention_xpu(cast out from fp16 to fp32)");
166166
}
167167
if (input_q.dtype() == DataType::FLOAT16) {
168168
int r_copy =
169-
xpu::copy(ctx.x_context(), qkv_temp_data, qkv_data, qkv->numel());
169+
xpu::copy(dev_ctx.x_context(), qkv_temp_data, qkv_data, qkv->numel());
170170
PADDLE_ENFORCE_XDNN_SUCCESS(r_copy, "cross_attention_xpu(copy out)");
171171
}
172172
}
@@ -177,7 +177,7 @@ void CrossAttentionXPUKernelImpl(
177177
w_dtype_, \
178178
qkv_dtype_, \
179179
gemm_dtype_, \
180-
Context>(ctx, \
180+
Context>(dev_ctx, \
181181
input_q, \
182182
input_kv, \
183183
fc_weight, \
@@ -193,7 +193,7 @@ void CrossAttentionXPUKernelImpl(
193193

194194
template <typename T, typename Context>
195195
void CrossAttentionXPUKernel(
196-
const Context& ctx,
196+
const Context& dev_ctx,
197197
const DenseTensor& input_q,
198198
const DenseTensor& input_kv,
199199
const std::vector<const DenseTensor*>& fc_weight,

paddle/phi/kernels/fusion/xpu/embedding_with_eltwise_add_xpu_kernel.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void FillSeqLod<float>(int batch_size,
5959
}
6060

6161
template <typename TT, typename TID, typename Context>
62-
void MultiEmbeddingKernel(const Context& ctx,
62+
void MultiEmbeddingKernel(const Context& dev_ctx,
6363
const std::vector<const DenseTensor*>& ids,
6464
const std::vector<const DenseTensor*>& tables,
6565
const paddle::optional<DenseTensor>& mask,
@@ -110,9 +110,9 @@ void MultiEmbeddingKernel(const Context& ctx,
110110
int batch_size = mask_tensor->dims()[0];
111111
auto pad_seq_len = mask_tensor->dims()[1];
112112
max_seq_len->Resize({1});
113-
ctx.template HostAlloc<int>(max_seq_len)[0] = pad_seq_len;
113+
dev_ctx.template HostAlloc<int>(max_seq_len)[0] = pad_seq_len;
114114
seq_lod->Resize({batch_size + 1});
115-
int* seq_lod_data = ctx.template HostAlloc<int>(seq_lod);
115+
int* seq_lod_data = dev_ctx.template HostAlloc<int>(seq_lod);
116116

117117
std::vector<int> cpu_seq_lod{0};
118118
switch (mask_tensor->dtype()) {
@@ -160,9 +160,9 @@ void MultiEmbeddingKernel(const Context& ctx,
160160
}
161161

162162
int r = xpu::multi_embedding_fusion<XPUType, XPUType, TID>(
163-
ctx.x_context(),
163+
dev_ctx.x_context(),
164164
arg_tables,
165-
reinterpret_cast<XPUType*>(ctx.template Alloc<TT>(out)),
165+
reinterpret_cast<XPUType*>(dev_ctx.template Alloc<TT>(out)),
166166
arg_ids,
167167
table_lens,
168168
emb_dim,
@@ -174,7 +174,7 @@ void MultiEmbeddingKernel(const Context& ctx,
174174

175175
template <typename T, typename Context>
176176
void EmbeddingWithEltwiseAddXpuKernel(
177-
const Context& ctx,
177+
const Context& dev_ctx,
178178
const std::vector<const DenseTensor*>& ids,
179179
const std::vector<const DenseTensor*>& tables,
180180
const paddle::optional<DenseTensor>& mask,
@@ -185,11 +185,11 @@ void EmbeddingWithEltwiseAddXpuKernel(
185185
switch (ids[0]->dtype()) {
186186
case DataType::INT32:
187187
MultiEmbeddingKernel<T, int, Context>(
188-
ctx, ids, tables, mask, padding_idx, out, seq_lod, max_seq_len);
188+
dev_ctx, ids, tables, mask, padding_idx, out, seq_lod, max_seq_len);
189189
break;
190190
case DataType::INT64:
191191
MultiEmbeddingKernel<T, int64_t, Context>(
192-
ctx, ids, tables, mask, padding_idx, out, seq_lod, max_seq_len);
192+
dev_ctx, ids, tables, mask, padding_idx, out, seq_lod, max_seq_len);
193193
break;
194194
default:
195195
PADDLE_THROW(common::errors::Unimplemented(

0 commit comments

Comments
 (0)