@@ -43,100 +43,98 @@ int LayerNorm_vulkan::create_pipeline(const Option& opt)
43
43
{
44
44
{
45
45
pipeline_layernorm_reduce_sum4_fp16_to_fp32 = new Pipeline (vkdev);
46
- pipeline_layernorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz (16 ,4 , 1 );
46
+ pipeline_layernorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz (16 , 4 , 1 );
47
47
pipeline_layernorm_reduce_sum4_fp16_to_fp32->create (LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32, opt, std::vector<vk_specialization_type>());
48
48
49
49
pipeline_layernorm_reduce_sum4_fp32[0 ] = new Pipeline (vkdev);
50
- pipeline_layernorm_reduce_sum4_fp32[0 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
50
+ pipeline_layernorm_reduce_sum4_fp32[0 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
51
51
pipeline_layernorm_reduce_sum4_fp32[0 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32, opt, std::vector<vk_specialization_type>());
52
52
pipeline_layernorm_reduce_sum4_fp32[1 ] = new Pipeline (vkdev);
53
- pipeline_layernorm_reduce_sum4_fp32[1 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
53
+ pipeline_layernorm_reduce_sum4_fp32[1 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
54
54
pipeline_layernorm_reduce_sum4_fp32[1 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32, opt, std::vector<vk_specialization_type>());
55
55
56
56
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 = new Pipeline (vkdev);
57
- pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz (16 ,4 , 1 );
57
+ pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz (16 , 4 , 1 );
58
58
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4->create (LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32_pack4, opt, std::vector<vk_specialization_type>());
59
59
60
60
pipeline_layernorm_reduce_sum4_fp32_pack4[0 ] = new Pipeline (vkdev);
61
- pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
62
- pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
61
+ pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
62
+ pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
63
63
pipeline_layernorm_reduce_sum4_fp32_pack4[1 ] = new Pipeline (vkdev);
64
- pipeline_layernorm_reduce_sum4_fp32_pack4[1 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
64
+ pipeline_layernorm_reduce_sum4_fp32_pack4[1 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
65
65
pipeline_layernorm_reduce_sum4_fp32_pack4[1 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32_pack4, opt, std::vector<vk_specialization_type>());
66
66
67
67
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 = new Pipeline (vkdev);
68
- pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz (16 ,4 , 1 );
68
+ pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz (16 , 4 , 1 );
69
69
pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8->create (LayerShaderType::layernorm_reduce_sum4_fp16_to_fp32_pack8, opt, std::vector<vk_specialization_type>());
70
70
71
71
pipeline_layernorm_reduce_sum4_fp32_pack8[0 ] = new Pipeline (vkdev);
72
- pipeline_layernorm_reduce_sum4_fp32_pack8[0 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
72
+ pipeline_layernorm_reduce_sum4_fp32_pack8[0 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
73
73
pipeline_layernorm_reduce_sum4_fp32_pack8[0 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32_pack8, opt, std::vector<vk_specialization_type>());
74
74
pipeline_layernorm_reduce_sum4_fp32_pack8[1 ] = new Pipeline (vkdev);
75
- pipeline_layernorm_reduce_sum4_fp32_pack8[1 ]->set_optimal_local_size_xyz (8 ,8 , 1 );
75
+ pipeline_layernorm_reduce_sum4_fp32_pack8[1 ]->set_optimal_local_size_xyz (8 , 8 , 1 );
76
76
pipeline_layernorm_reduce_sum4_fp32_pack8[1 ]->create (LayerShaderType::layernorm_reduce_sum4_fp32_pack8, opt, std::vector<vk_specialization_type>());
77
77
}
78
78
79
79
{
80
80
pipeline_layernorm_reduce_mean = new Pipeline (vkdev);
81
- pipeline_layernorm_reduce_mean->set_optimal_local_size_xyz (1 ,8 , 8 );
81
+ pipeline_layernorm_reduce_mean->set_optimal_local_size_xyz (1 , 8 , 8 );
82
82
pipeline_layernorm_reduce_mean->create (LayerShaderType::layernorm_reduce_mean, opt, std::vector<vk_specialization_type>());
83
83
84
84
pipeline_layernorm_reduce_mean_pack4 = new Pipeline (vkdev);
85
- pipeline_layernorm_reduce_mean_pack4->set_optimal_local_size_xyz (1 ,8 , 8 );
85
+ pipeline_layernorm_reduce_mean_pack4->set_optimal_local_size_xyz (1 , 8 , 8 );
86
86
pipeline_layernorm_reduce_mean_pack4->create (LayerShaderType::layernorm_reduce_mean_pack4, opt, std::vector<vk_specialization_type>());
87
87
88
88
pipeline_layernorm_reduce_mean_pack8 = new Pipeline (vkdev);
89
- pipeline_layernorm_reduce_mean_pack8->set_optimal_local_size_xyz (1 ,8 , 8 );
89
+ pipeline_layernorm_reduce_mean_pack8->set_optimal_local_size_xyz (1 , 8 , 8 );
90
90
pipeline_layernorm_reduce_mean_pack8->create (LayerShaderType::layernorm_reduce_mean_pack8, opt, std::vector<vk_specialization_type>());
91
91
}
92
92
93
93
{
94
94
pipeline_layernorm_sub_mean_square = new Pipeline (vkdev);
95
- pipeline_layernorm_sub_mean_square->set_optimal_local_size_xyz (8 ,8 , 1 );
95
+ pipeline_layernorm_sub_mean_square->set_optimal_local_size_xyz (8 , 8 , 1 );
96
96
pipeline_layernorm_sub_mean_square->create (LayerShaderType::layernorm_sub_mean_square, opt, std::vector<vk_specialization_type>());
97
97
98
98
pipeline_layernorm_sub_mean_square_pack4 = new Pipeline (vkdev);
99
- pipeline_layernorm_sub_mean_square_pack4->set_optimal_local_size_xyz (8 ,8 , 1 );
99
+ pipeline_layernorm_sub_mean_square_pack4->set_optimal_local_size_xyz (8 , 8 , 1 );
100
100
pipeline_layernorm_sub_mean_square_pack4->create (LayerShaderType::layernorm_sub_mean_square_pack4, opt, std::vector<vk_specialization_type>());
101
101
102
102
pipeline_layernorm_sub_mean_square_pack8 = new Pipeline (vkdev);
103
- pipeline_layernorm_sub_mean_square_pack8->set_optimal_local_size_xyz (8 ,8 , 1 );
103
+ pipeline_layernorm_sub_mean_square_pack8->set_optimal_local_size_xyz (8 , 8 , 1 );
104
104
pipeline_layernorm_sub_mean_square_pack8->create (LayerShaderType::layernorm_sub_mean_square_pack8, opt, std::vector<vk_specialization_type>());
105
105
}
106
106
107
-
108
107
{
109
108
std::vector<vk_specialization_type> specializations (1 );
110
109
specializations[0 ].f = eps;
111
110
112
- pipeline_layernorm_coeffs = new Pipeline (vkdev);
113
- pipeline_layernorm_coeffs->set_optimal_local_size_xyz (8 ,8 ,1 );
114
- pipeline_layernorm_coeffs->create (LayerShaderType::layernorm_coeffs, opt, specializations);
115
-
116
- pipeline_layernorm_coeffs_pack4 = new Pipeline (vkdev);
117
- pipeline_layernorm_coeffs_pack4->set_optimal_local_size_xyz (8 ,8 ,1 );
118
- pipeline_layernorm_coeffs_pack4->create (LayerShaderType::layernorm_coeffs_pack4, opt, specializations);
111
+ pipeline_layernorm_coeffs = new Pipeline (vkdev);
112
+ pipeline_layernorm_coeffs->set_optimal_local_size_xyz (8 , 8 , 1 );
113
+ pipeline_layernorm_coeffs->create (LayerShaderType::layernorm_coeffs, opt, specializations);
119
114
120
- pipeline_layernorm_coeffs_pack8 = new Pipeline (vkdev);
121
- pipeline_layernorm_coeffs_pack8 ->set_optimal_local_size_xyz (8 ,8 , 1 );
122
- pipeline_layernorm_coeffs_pack8 ->create (LayerShaderType::layernorm_coeffs_pack8 , opt, specializations);
115
+ pipeline_layernorm_coeffs_pack4 = new Pipeline (vkdev);
116
+ pipeline_layernorm_coeffs_pack4 ->set_optimal_local_size_xyz (8 , 8 , 1 );
117
+ pipeline_layernorm_coeffs_pack4 ->create (LayerShaderType::layernorm_coeffs_pack4 , opt, specializations);
123
118
119
+ pipeline_layernorm_coeffs_pack8 = new Pipeline (vkdev);
120
+ pipeline_layernorm_coeffs_pack8->set_optimal_local_size_xyz (8 , 8 , 1 );
121
+ pipeline_layernorm_coeffs_pack8->create (LayerShaderType::layernorm_coeffs_pack8, opt, specializations);
124
122
}
125
123
126
124
{
127
125
std::vector<vk_specialization_type> specializations (1 );
128
126
specializations[0 ].i = affine;
129
127
130
128
pipeline_layernorm_norm = new Pipeline (vkdev);
131
- pipeline_layernorm_norm->set_optimal_local_size_xyz (8 ,8 , 1 );
129
+ pipeline_layernorm_norm->set_optimal_local_size_xyz (8 , 8 , 1 );
132
130
pipeline_layernorm_norm->create (LayerShaderType::layernorm_norm, opt, specializations);
133
131
134
132
pipeline_layernorm_norm_pack4 = new Pipeline (vkdev);
135
- pipeline_layernorm_norm_pack4->set_optimal_local_size_xyz (8 ,8 , 1 );
133
+ pipeline_layernorm_norm_pack4->set_optimal_local_size_xyz (8 , 8 , 1 );
136
134
pipeline_layernorm_norm_pack4->create (LayerShaderType::layernorm_norm_pack4, opt, specializations);
137
135
138
136
pipeline_layernorm_norm_pack8 = new Pipeline (vkdev);
139
- pipeline_layernorm_norm_pack8->set_optimal_local_size_xyz (8 ,8 , 1 );
137
+ pipeline_layernorm_norm_pack8->set_optimal_local_size_xyz (8 , 8 , 1 );
140
138
pipeline_layernorm_norm_pack8->create (LayerShaderType::layernorm_norm_pack8, opt, specializations);
141
139
}
142
140
@@ -238,24 +236,24 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
238
236
int group_size;
239
237
int num_groups_per_channel;
240
238
if (dims == 1 )
241
- { // (w)
239
+ { // (w)
242
240
group_size = w;
243
241
num_groups_per_channel = 1 ;
244
242
}
245
243
else if (dims == 2 )
246
- { // (w, h)
244
+ { // (w, h)
247
245
group_size = w;
248
246
num_groups_per_channel = h;
249
247
}
250
248
else
251
- { // dims == 3, (w, h, c)
249
+ { // dims == 3, (w, h, c)
252
250
if (affine_size == w)
253
251
{
254
252
group_size = w;
255
253
num_groups_per_channel = h;
256
254
}
257
255
else
258
- { // affine_size == w * h, like InstanceNorm
256
+ { // affine_size == w * h, like InstanceNorm
259
257
group_size = w * h;
260
258
num_groups_per_channel = 1 ;
261
259
}
@@ -292,10 +290,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
292
290
dispatcher.c = channels;
293
291
294
292
const Pipeline* pipeline_reduce_sum4 = (elemsize / elempack == 2 )
295
- ? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4
296
- : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
297
- : (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]
298
- : pipeline_layernorm_reduce_sum4_fp32[0 ]);
293
+ ? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
294
+ : (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0 ] : pipeline_layernorm_reduce_sum4_fp32[0 ]);
299
295
300
296
cmd.record_pipeline (pipeline_reduce_sum4, bindings, constants, dispatcher);
301
297
@@ -323,12 +319,10 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
323
319
324
320
dispatcher.w = reduced_w;
325
321
326
- const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2 ]
327
- : pipeline_layernorm_reduce_sum4_fp32[pb % 2 ];
322
+ const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2 ] : pipeline_layernorm_reduce_sum4_fp32[pb % 2 ];
328
323
cmd.record_pipeline (pipeline_reduce_iter, bindings_iter, constants_iter, dispatcher);
329
324
pb++;
330
325
sum_workspace = sum_workspace_reduced;
331
-
332
326
}
333
327
334
328
std::vector<VkMat> mean_bindings (2 );
@@ -343,10 +337,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
343
337
mean_constants[4 ].f = (float )group_size;
344
338
345
339
dispatcher.w = 1 ;
346
- const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4
347
- : pipeline_layernorm_reduce_mean;
340
+ const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4 : pipeline_layernorm_reduce_mean;
348
341
cmd.record_pipeline (pipeline_reduce_mean, mean_bindings, mean_constants, dispatcher);
349
-
350
342
}
351
343
352
344
{
@@ -364,8 +356,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
364
356
sq_constants[3 ].i = cstep;
365
357
sq_constants[4 ].i = affine_size;
366
358
367
- const Pipeline* pipeline_sub_mean_square = elempack == 8 ? pipeline_layernorm_sub_mean_square_pack8 : elempack == 4 ? pipeline_layernorm_sub_mean_square_pack4
368
- : pipeline_layernorm_sub_mean_square;
359
+ const Pipeline* pipeline_sub_mean_square = elempack == 8 ? pipeline_layernorm_sub_mean_square_pack8 : elempack == 4 ? pipeline_layernorm_sub_mean_square_pack4 : pipeline_layernorm_sub_mean_square;
369
360
cmd.record_pipeline (pipeline_sub_mean_square, sq_bindings, sq_constants, square_workspace);
370
361
371
362
// Reduce sum of squares
@@ -393,14 +384,11 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
393
384
dispatcher.c = channels;
394
385
395
386
const Pipeline* pipeline_reduce_sum4 = (square_workspace.elemsize / elempack == 2 )
396
- ? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4
397
- : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
398
- : (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0 ]
399
- : pipeline_layernorm_reduce_sum4_fp32[0 ]);
387
+ ? (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack8 : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp16_to_fp32_pack4 : pipeline_layernorm_reduce_sum4_fp16_to_fp32)
388
+ : (elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[0 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[0 ] : pipeline_layernorm_reduce_sum4_fp32[0 ]);
400
389
401
390
cmd.record_pipeline (pipeline_reduce_sum4, bindings, constants, dispatcher);
402
391
403
-
404
392
int pb = 1 ;
405
393
while (sqsum_workspace.w > 1 )
406
394
{
@@ -424,12 +412,10 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
424
412
425
413
dispatcher.w = reduced_w;
426
414
427
- const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2 ]
428
- : pipeline_layernorm_reduce_sum4_fp32[pb % 2 ];
415
+ const Pipeline* pipeline_reduce_iter = elempack == 8 ? pipeline_layernorm_reduce_sum4_fp32_pack8[pb % 2 ] : elempack == 4 ? pipeline_layernorm_reduce_sum4_fp32_pack4[pb % 2 ] : pipeline_layernorm_reduce_sum4_fp32[pb % 2 ];
429
416
cmd.record_pipeline (pipeline_reduce_iter, bindings_iter, constants_iter, dispatcher);
430
417
pb++;
431
418
sqsum_workspace = sum_workspace_reduced;
432
-
433
419
}
434
420
435
421
std::vector<VkMat> var_bindings (2 );
@@ -444,10 +430,8 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
444
430
445
431
dispatcher.w = 1 ;
446
432
447
- const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4
448
- : pipeline_layernorm_reduce_mean;
433
+ const Pipeline* pipeline_reduce_mean = elempack == 8 ? pipeline_layernorm_reduce_mean_pack8 : elempack == 4 ? pipeline_layernorm_reduce_mean_pack4 : pipeline_layernorm_reduce_mean;
449
434
cmd.record_pipeline (pipeline_reduce_mean, var_bindings, var_constants, dispatcher);
450
-
451
435
}
452
436
453
437
// coeffs a and b ---
@@ -469,8 +453,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
469
453
dispatcher_coeffs.h = num_groups_per_channel;
470
454
dispatcher_coeffs.c = channels;
471
455
472
- const Pipeline* pipeline_coeffs = elempack == 8 ? pipeline_layernorm_coeffs_pack8 : elempack == 4 ? pipeline_layernorm_coeffs_pack4
473
- : pipeline_layernorm_coeffs;
456
+ const Pipeline* pipeline_coeffs = elempack == 8 ? pipeline_layernorm_coeffs_pack8 : elempack == 4 ? pipeline_layernorm_coeffs_pack4 : pipeline_layernorm_coeffs;
474
457
cmd.record_pipeline (pipeline_coeffs, coeff_bindings, coeff_constants, dispatcher_coeffs);
475
458
476
459
// apply norm
@@ -487,8 +470,7 @@ int LayerNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
487
470
norm_constants[3 ].i = cstep;
488
471
norm_constants[4 ].i = affine_size;
489
472
490
- const Pipeline* pipeline_norm = elempack == 8 ? pipeline_layernorm_norm_pack8 : elempack == 4 ? pipeline_layernorm_norm_pack4
491
- : pipeline_layernorm_norm;
473
+ const Pipeline* pipeline_norm = elempack == 8 ? pipeline_layernorm_norm_pack8 : elempack == 4 ? pipeline_layernorm_norm_pack4 : pipeline_layernorm_norm;
492
474
cmd.record_pipeline (pipeline_norm, norm_bindings, norm_constants, bottom_top_blob);
493
475
494
476
if (bottom_top_blob.dims == 1 && old_elempack != 0 && old_elempack != bottom_top_blob.elempack ) // dim 1 is forbidden for pack
0 commit comments