Skip to content

Commit 1cf9f0d

Browse files
committed
Merge remote-tracking branch 'origin/layer_norm' into layer_norm
2 parents 1c49e6c + 159ff89 commit 1cf9f0d

File tree

1 file changed

+43
-61
lines changed

1 file changed

+43
-61
lines changed

src/layer/vulkan/layernorm_vulkan.cpp

Lines changed: 43 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -43,100 +43,98 @@ int LayerNorm_vulkan::create_pipeline(const Option& opt)
4343
{
4444
{
4545
pipeline_layernorm_reduce_sum4_fp16_to_fp32 = new Pipeline(vkdev);
46-
pipeline_layernorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz(16,4,1);
46+
pipeline_layernorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz(16, 4, 1);
4747
pipeline_layernorm_reduce_sum4_fp16_to_fp32->create(LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32, opt, std::vector<vk_specialization_type>());
4848

4949
pipeline_layernorm_reduce_sum4_fp32[0] = new Pipeline(vkdev);
50-
pipeline_layernorm_reduce_sum4_fp32[0]->set_optimal_local_size_xyz(8,8,1);
50+
pipeline_layernorm_reduce_sum4_fp32[0]->set_optimal_local_size_xyz(8, 8, 1);
5151
pipeline_layernorm_reduce_sum4_fp32[0]->create(LayerShaderType::layernorm_reduce_sum4_fp32, opt, std::vector<vk_specialization_type>());
5252
pipeline_layernorm_reduce_sum4_fp32[1] = new Pipeline(vkdev);
53-
pipeline_layernorm_reduce_sum4_fp32[1]->set_optimal_local_size_xyz(8,8,1);
53+
pipeline_layernorm_reduce_sum4_fp32[1]->set_optimal_local_size_xyz(8, 8, 1);
5454
pipeline_layernorm_reduce_sum4_fp32[1]->create(LayerShaderType::layernorm_reduce_sum4_fp32, opt, std::vector<vk_specialization_type>());
5555

5656
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 = new Pipeline(vkdev);
57-
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz(16,4,1);
57+
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz(16, 4, 1);
5858
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->create(LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32_pack4, opt, std::vector<vk_specialization_type>());
5959

6060
pipeline_layernorm_reduce_sum4_fp32_pack4[0] = new Pipeline(vkdev);
61-
pipeline_layernorm_reduce_sum4_fp32_pack4[0]->set_optimal_local_size_xyz(8,8,1);
62-
pipeline_layernorm_reduce_sum4_fp32_pack4[0]->create(LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
61+
pipeline_layernorm_reduce_sum4_fp32_pack4[0]->set_optimal_local_size_xyz(8, 8, 1);
62+
pipeline_layernorm_reduce_sum4_fp32_pack4[0]->create(LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
6363
pipeline_layernorm_reduce_sum4_fp32_pack4[1] = new Pipeline(vkdev);
64-
pipeline_layernorm_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(8,8,1);
64+
pipeline_layernorm_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(8, 8, 1);
6565
pipeline_layernorm_reduce_sum4_fp32_pack4[1]->create(LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
6666

6767
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 = new Pipeline(vkdev);
68-
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz(16,4,1);
68+
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz(16, 4, 1);
6969
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->create(LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32_pack8, opt, std::vector<vk_specialization_type>());
7070

7171
pipeline_layernorm_reduce_sum4_fp32_pack8[0] = new Pipeline(vkdev);
72-
pipeline_layernorm_reduce_sum4_fp32_pack8[0]->set_optimal_local_size_xyz(8,8,1);
72+
pipeline_layernorm_reduce_sum4_fp32_pack8[0]->set_optimal_local_size_xyz(8, 8, 1);
7373
pipeline_layernorm_reduce_sum4_fp32_pack8[0]->create(LayerShaderType::layernorm_reduce_sum4_fp32_pack8, opt, std::vector<vk_specialization_type>());
7474
pipeline_layernorm_reduce_sum4_fp32_pack8[1] = new Pipeline(vkdev);
75-
pipeline_layernorm_reduce_sum4_fp32_pack8[1]->set_optimal_local_size_xyz(8,8,1);
75+
pipeline_layernorm_reduce_sum4_fp32_pack8[1]->set_optimal_local_size_xyz(8, 8, 1);
7676
pipeline_layernorm_reduce_sum4_fp32_pack8[1]->create(LayerShaderType::layernorm_reduce_sum4_fp32_pack8, opt, std::vector<vk_specialization_type>());
7777
}
7878

7979
{
8080
pipeline_layernorm_reduce_mean = new Pipeline(vkdev);
81-
pipeline_layernorm_reduce_mean->set_optimal_local_size_xyz(1,8,8);
81+
pipeline_layernorm_reduce_mean->set_optimal_local_size_xyz(1, 8, 8);
8282
pipeline_layernorm_reduce_mean->create(LayerShaderType::layernorm_reduce_mean, opt, std::vector<vk_specialization_type>());
8383

8484
pipeline_layernorm_reduce_mean_pack4 = new Pipeline(vkdev);
85-
pipeline_layernorm_reduce_mean_pack4->set_optimal_local_size_xyz(1,8,8);
85+
pipeline_layernorm_reduce_mean_pack4->set_optimal_local_size_xyz(1, 8, 8);
8686
pipeline_layernorm_reduce_mean_pack4->create(LayerShaderType::layernorm_reduce_mean_pack4, opt, std::vector<vk_specialization_type>());
8787

8888
pipeline_layernorm_reduce_mean_pack8 = new Pipeline(vkdev);
89-
pipeline_layernorm_reduce_mean_pack8->set_optimal_local_size_xyz(1,8,8);
89+
pipeline_layernorm_reduce_mean_pack8->set_optimal_local_size_xyz(1, 8, 8);
9090
pipeline_layernorm_reduce_mean_pack8->create(LayerShaderType::layernorm_reduce_mean_pack8, opt, std::vector<vk_specialization_type>());
9191
}
9292

9393
{
9494
pipeline_layernorm_sub_mean_square = new Pipeline(vkdev);
95-
pipeline_layernorm_sub_mean_square->set_optimal_local_size_xyz(8,8,1);
95+
pipeline_layernorm_sub_mean_square->set_optimal_local_size_xyz(8, 8, 1);
9696
pipeline_layernorm_sub_mean_square->create(LayerShaderType::layernorm_sub_mean_square, opt, std::vector<vk_specialization_type>());
9797

9898
pipeline_layernorm_sub_mean_square_pack4 = new Pipeline(vkdev);
99-
pipeline_layernorm_sub_mean_square_pack4->set_optimal_local_size_xyz(8,8,1);
99+
pipeline_layernorm_sub_mean_square_pack4->set_optimal_local_size_xyz(8, 8, 1);
100100
pipeline_layernorm_sub_mean_square_pack4->create(LayerShaderType::layernorm_sub_mean_square_pack4, opt, std::vector<vk_specialization_type>());
101101

102102
pipeline_layernorm_sub_mean_square_pack8 = new Pipeline(vkdev);
103-
pipeline_layernorm_sub_mean_square_pack8->set_optimal_local_size_xyz(8,8,1);
103+
pipeline_layernorm_sub_mean_square_pack8->set_optimal_local_size_xyz(8, 8, 1);
104104
pipeline_layernorm_sub_mean_square_pack8->create(LayerShaderType::layernorm_sub_mean_square_pack8, opt, std::vector<vk_specialization_type>());
105105
}
106106

107-
108107
{
109108
std::vector<vk_specialization_type> specializations(1);
110109
specializations[0].f = eps;
111110

112-
pipeline_layernorm_coeffs = new Pipeline(vkdev);
113-
pipeline_layernorm_coeffs->set_optimal_local_size_xyz(8,8,1);
114-
pipeline_layernorm_coeffs->create(LayerShaderType::layernorm_coeffs, opt, specializations);
115-
116-
pipeline_layernorm_coeffs_pack4 = new Pipeline(vkdev);
117-
pipeline_layernorm_coeffs_pack4->set_optimal_local_size_xyz(8,8,1);
118-
pipeline_layernorm_coeffs_pack4->create(LayerShaderType::layernorm_coeffs_pack4, opt, specializations);
111+
pipeline_layernorm_coeffs = new Pipeline(vkdev);
112+
pipeline_layernorm_coeffs->set_optimal_local_size_xyz(8, 8, 1);
113+
pipeline_layernorm_coeffs->create(LayerShaderType::layernorm_coeffs, opt, specializations);
119114

120-
pipeline_layernorm_coeffs_pack8 = new Pipeline(vkdev);
121-
pipeline_layernorm_coeffs_pack8->set_optimal_local_size_xyz(8,8,1);
122-
pipeline_layernorm_coeffs_pack8->create(LayerShaderType::layernorm_coeffs_pack8, opt, specializations);
115+
pipeline_layernorm_coeffs_pack4 = new Pipeline(vkdev);
116+
pipeline_layernorm_coeffs_pack4->set_optimal_local_size_xyz(8, 8, 1);
117+
pipeline_layernorm_coeffs_pack4->create(LayerShaderType::layernorm_coeffs_pack4, opt, specializations);
123118

119+
pipeline_layernorm_coeffs_pack8 = new Pipeline(vkdev);
120+
pipeline_layernorm_coeffs_pack8->set_optimal_local_size_xyz(8, 8, 1);
121+
pipeline_layernorm_coeffs_pack8->create(LayerShaderType::layernorm_coeffs_pack8, opt, specializations);
124122
}
125123

126124
{
127125
std::vector<vk_specialization_type> specializations(1);
128126
specializations[0].i = affine;
129127

130128
pipeline_layernorm_norm = new Pipeline(vkdev);
131-
pipeline_layernorm_norm->set_optimal_local_size_xyz(8,8,1);
129+
pipeline_layernorm_norm->set_optimal_local_size_xyz(8, 8, 1);
132130
pipeline_layernorm_norm->create(LayerShaderType::layernorm_norm, opt, specializations);
133131

134132
pipeline_layernorm_norm_pack4 = new Pipeline(vkdev);
135-
pipeline_layernorm_norm_pack4->set_optimal_local_size_xyz(8,8,1);
133+
pipeline_layernorm_norm_pack4->set_optimal_local_size_xyz(8, 8, 1);
136134
pipeline_layernorm_norm_pack4->create(LayerShaderType::layernorm_norm_pack4, opt, specializations);
137135

138136
pipeline_layernorm_norm_pack8 = new Pipeline(vkdev);
139-
pipeline_layernorm_norm_pack8->set_optimal_local_size_xyz(8,8,1);
137+
pipeline_layernorm_norm_pack8->set_optimal_local_size_xyz(8, 8, 1);
140138
pipeline_layernorm_norm_pack8->create(LayerShaderType::layernorm_norm_pack8, opt, specializations);
141139
}
142140

@@ -238,24 +236,24 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
238236
int group_size;
239237
int num_groups_per_channel;
240238
if (dims == 1)
241-
{ // (w)
239+
{ // (w)
242240
group_size = w;
243241
num_groups_per_channel = 1;
244242
}
245243
else if (dims == 2)
246-
{ // (w, h)
244+
{ // (w, h)
247245
group_size = w;
248246
num_groups_per_channel = h;
249247
}
250248
else
251-
{ // dims == 3, (w, h, c)
249+
{ // dims == 3, (w, h, c)
252250
if (affine_size == w)
253251
{
254252
group_size = w;
255253
num_groups_per_channel = h;
256254
}
257255
else
258-
{ // affine_size == w * h, like InstanceNorm
256+
{ // affine_size == w * h, like InstanceNorm
259257
group_size = w * h;
260258
num_groups_per_channel = 1;
261259
}
@@ -292,10 +290,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
292290
dispatcher.c = channels;
293291

294292
const Pipeline* pipeline_reduce_sum4 = (elemsize / elempack == 2)
295-
? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4
296-
: pipeline_layernorm_reduce_sum4_fp16_to_fp32)
297-
: (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0]
298-
: pipeline_layernorm_reduce_sum4_fp32[0]);
293+
? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
294+
: (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0] : pipeline_layernorm_reduce_sum4_fp32[0]);
299295

300296
cmd.record_pipeline(pipeline_reduce_sum4, bindings, constants, dispatcher);
301297

@@ -323,12 +319,10 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
323319

324320
dispatcher.w = reduced_w;
325321

326-
const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2]
327-
: pipeline_layernorm_reduce_sum4_fp32[pb % 2];
322+
const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2] : pipeline_layernorm_reduce_sum4_fp32[pb % 2];
328323
cmd.record_pipeline(pipeline_reduce_iter, bindings_iter, constants_iter, dispatcher);
329324
pb++;
330325
sum_workspace = sum_workspace_reduced;
331-
332326
}
333327

334328
std::vector<VkMat> mean_bindings(2);
@@ -343,10 +337,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
343337
mean_constants[4].f = (float)group_size;
344338

345339
dispatcher.w = 1;
346-
const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4
347-
: pipeline_layernorm_reduce_mean;
340+
const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4 : pipeline_layernorm_reduce_mean;
348341
cmd.record_pipeline(pipeline_reduce_mean, mean_bindings, mean_constants, dispatcher);
349-
350342
}
351343

352344
{
@@ -364,8 +356,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
364356
sq_constants[3].i = cstep;
365357
sq_constants[4].i = affine_size;
366358

367-
const Pipeline* pipeline_sub_mean_square = elempack == 8 ? pipeline_layernorm_sub_mean_square_pack8 : elempack == 4 ? pipeline_layernorm_sub_mean_square_pack4
368-
: pipeline_layernorm_sub_mean_square;
359+
const Pipeline* pipeline_sub_mean_square = elempack == 8 ? pipeline_layernorm_sub_mean_square_pack8 : elempack == 4 ? pipeline_layernorm_sub_mean_square_pack4 : pipeline_layernorm_sub_mean_square;
369360
cmd.record_pipeline(pipeline_sub_mean_square, sq_bindings, sq_constants, square_workspace);
370361

371362
// Reduce sum of squares
@@ -393,14 +384,11 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
393384
dispatcher.c = channels;
394385

395386
const Pipeline* pipeline_reduce_sum4 = (square_workspace.elemsize / elempack == 2)
396-
? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4
397-
: pipeline_layernorm_reduce_sum4_fp16_to_fp32)
398-
: (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0]
399-
: pipeline_layernorm_reduce_sum4_fp32[0]);
387+
? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
388+
: (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0] : pipeline_layernorm_reduce_sum4_fp32[0]);
400389

401390
cmd.record_pipeline(pipeline_reduce_sum4, bindings, constants, dispatcher);
402391

403-
404392
int pb = 1;
405393
while (sqsum_workspace.w > 1)
406394
{
@@ -424,12 +412,10 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
424412

425413
dispatcher.w = reduced_w;
426414

427-
const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2]
428-
: pipeline_layernorm_reduce_sum4_fp32[pb % 2];
415+
const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2] : pipeline_layernorm_reduce_sum4_fp32[pb % 2];
429416
cmd.record_pipeline(pipeline_reduce_iter, bindings_iter, constants_iter, dispatcher);
430417
pb++;
431418
sqsum_workspace = sum_workspace_reduced;
432-
433419
}
434420

435421
std::vector<VkMat> var_bindings(2);
@@ -444,10 +430,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
444430

445431
dispatcher.w = 1;
446432

447-
const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4
448-
: pipeline_layernorm_reduce_mean;
433+
const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4 : pipeline_layernorm_reduce_mean;
449434
cmd.record_pipeline(pipeline_reduce_mean, var_bindings, var_constants, dispatcher);
450-
451435
}
452436

453437
// coeffs a and b ---
@@ -469,8 +453,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
469453
dispatcher_coeffs.h = num_groups_per_channel;
470454
dispatcher_coeffs.c = channels;
471455

472-
const Pipeline* pipeline_coeffs = elempack == 8 ? pipeline_layernorm_coeffs_pack8 : elempack == 4 ? pipeline_layernorm_coeffs_pack4
473-
: pipeline_layernorm_coeffs;
456+
const Pipeline* pipeline_coeffs = elempack == 8 ? pipeline_layernorm_coeffs_pack8 : elempack == 4 ? pipeline_layernorm_coeffs_pack4 : pipeline_layernorm_coeffs;
474457
cmd.record_pipeline(pipeline_coeffs, coeff_bindings, coeff_constants, dispatcher_coeffs);
475458

476459
// apply norm
@@ -487,8 +470,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
487470
norm_constants[3].i = cstep;
488471
norm_constants[4].i = affine_size;
489472

490-
const Pipeline* pipeline_norm = elempack == 8 ? pipeline_layernorm_norm_pack8 : elempack == 4 ? pipeline_layernorm_norm_pack4
491-
: pipeline_layernorm_norm;
473+
const Pipeline* pipeline_norm = elempack == 8 ? pipeline_layernorm_norm_pack8 : elempack == 4 ? pipeline_layernorm_norm_pack4 : pipeline_layernorm_norm;
492474
cmd.record_pipeline(pipeline_norm, norm_bindings, norm_constants, bottom_top_blob);
493475

494476
if (bottom_top_blob.dims == 1 && old_elempack != 0 && old_elempack != bottom_top_blob.elempack) // dim 1 is forbidden for pack

0 commit comments

Comments
 (0)