@@ -1334,7 +1334,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
1334
1334
uint32_t desc[A6XX_TEX_CONST_DWORDS];
1335
1335
memcpy (desc, iview->view .descriptor , sizeof (desc));
1336
1336
1337
- enum a6xx_format fmt = blit_format_texture<CHIP>(format, TILE6_LINEAR, false , true ).fmt ;
1337
+ enum a6xx_format fmt =
1338
+ blit_format_texture<CHIP>(format, TILE6_2,
1339
+ iview->view .is_mutable , true ).fmt ;
1338
1340
fixup_src_format (&format, dst_format, &fmt);
1339
1341
1340
1342
/* patch the format so that depth/stencil get the right format and swizzle */
@@ -1348,7 +1350,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
1348
1350
A6XX_TEX_CONST_0_SWIZ_W (A6XX_TEX_W);
1349
1351
1350
1352
/* patched for gmem */
1351
- desc[0 ] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
1353
+ desc[0 ] &= ~A6XX_TEX_CONST_0_TILE_MODE__MASK;
1354
+ if (!iview->view .is_mutable )
1355
+ desc[0 ] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
1352
1356
desc[0 ] |= A6XX_TEX_CONST_0_TILE_MODE (TILE6_2);
1353
1357
desc[2 ] =
1354
1358
A6XX_TEX_CONST_2_TYPE (A6XX_TEX_2D) |
@@ -4856,7 +4860,8 @@ template <chip CHIP>
4856
4860
static void
4857
4861
store_cp_blit (struct tu_cmd_buffer *cmd,
4858
4862
struct tu_cs *cs,
4859
- const struct tu_image_view *iview,
4863
+ const struct tu_image_view *src_iview,
4864
+ const struct tu_image_view *dst_iview,
4860
4865
uint32_t samples,
4861
4866
bool separate_stencil,
4862
4867
enum pipe_format src_format,
@@ -4867,33 +4872,44 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
4867
4872
{
4868
4873
r2d_setup_common<CHIP>(cmd, cs, src_format, dst_format,
4869
4874
VK_IMAGE_ASPECT_COLOR_BIT, 0 , false ,
4870
- iview ->view .ubwc_enabled , true );
4875
+ dst_iview ->view .ubwc_enabled , true );
4871
4876
4872
- if (iview ->image ->vk .format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4877
+ if (dst_iview ->image ->vk .format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4873
4878
if (!separate_stencil) {
4874
- r2d_dst_depth (cs, iview , layer);
4879
+ r2d_dst_depth (cs, dst_iview , layer);
4875
4880
} else {
4876
- r2d_dst_stencil (cs, iview , layer);
4881
+ r2d_dst_stencil (cs, dst_iview , layer);
4877
4882
}
4878
4883
} else {
4879
- r2d_dst<CHIP>(cs, &iview ->view , layer, src_format);
4884
+ r2d_dst<CHIP>(cs, &dst_iview ->view , layer, src_format);
4880
4885
}
4881
4886
4882
- enum a6xx_format fmt = blit_format_texture<CHIP>(src_format, TILE6_2, false , true ).fmt ;
4883
- fixup_src_format (&src_format, dst_format, &fmt);
4887
+ /* Note: we compute the swap here instead of using the color_swap as
4888
+ * programmed when we setup the color attachment because the attachment in
4889
+ * GMEM ignores the swap except when MUTABLEEN is enabled. If the
4890
+ * color attachment is linear, we need to use the identity swap even if the
4891
+ * original attachment has a non-identity swap.
4892
+ */
4893
+ struct tu_native_format fmt =
4894
+ blit_format_texture<CHIP>(src_format, TILE6_2,
4895
+ src_iview->view .is_mutable , true );
4896
+ enum a6xx_format format = fmt.fmt ;
4897
+ fixup_src_format (&src_format, dst_format, &format);
4884
4898
4885
4899
tu_cs_emit_regs (cs,
4886
4900
SP_PS_2D_SRC_INFO (CHIP,
4887
- .color_format = fmt ,
4901
+ .color_format = format ,
4888
4902
.tile_mode = TILE6_2,
4889
- .color_swap = WZYX ,
4903
+ .color_swap = fmt. swap ,
4890
4904
.srgb = util_format_is_srgb (src_format),
4891
4905
.samples = tu_msaa_samples (samples),
4892
4906
.samples_average = !util_format_is_pure_integer (dst_format) &&
4893
4907
!util_format_is_depth_or_stencil (dst_format),
4894
4908
.unk20 = 1 ,
4895
4909
.unk22 = 1 ),
4896
- SP_PS_2D_SRC_SIZE (CHIP, .width = iview->vk .extent .width , .height = iview->vk .extent .height ),
4910
+ SP_PS_2D_SRC_SIZE (CHIP,
4911
+ .width = dst_iview->vk .extent .width ,
4912
+ .height = dst_iview->vk .extent .height ),
4897
4913
SP_PS_2D_SRC (CHIP, .qword = cmd->device ->physical_device ->gmem_base + gmem_offset),
4898
4914
SP_PS_2D_SRC_PITCH (CHIP, .pitch = cmd->state .tiling ->tile0 .width * cpp));
4899
4915
@@ -4921,7 +4937,8 @@ template <chip CHIP>
4921
4937
static void
4922
4938
store_3d_blit (struct tu_cmd_buffer *cmd,
4923
4939
struct tu_cs *cs,
4924
- const struct tu_image_view *iview,
4940
+ const struct tu_image_view *src_iview,
4941
+ const struct tu_image_view *dst_iview,
4925
4942
VkSampleCountFlagBits dst_samples,
4926
4943
bool separate_stencil,
4927
4944
enum pipe_format src_format,
@@ -4949,21 +4966,21 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
4949
4966
}
4950
4967
4951
4968
r3d_setup<CHIP>(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT,
4952
- 0 , false , iview ->view .ubwc_enabled , dst_samples);
4969
+ 0 , false , dst_iview ->view .ubwc_enabled , dst_samples);
4953
4970
4954
4971
r3d_coords (cmd, cs, render_area->offset , render_area->offset , render_area->extent );
4955
4972
4956
- if (iview ->image ->vk .format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4973
+ if (dst_iview ->image ->vk .format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4957
4974
if (!separate_stencil) {
4958
- r3d_dst_depth<CHIP>(cs, iview , layer);
4975
+ r3d_dst_depth<CHIP>(cs, dst_iview , layer);
4959
4976
} else {
4960
- r3d_dst_stencil<CHIP>(cs, iview , layer);
4977
+ r3d_dst_stencil<CHIP>(cs, dst_iview , layer);
4961
4978
}
4962
4979
} else {
4963
- r3d_dst<CHIP>(cs, &iview ->view , layer, src_format);
4980
+ r3d_dst<CHIP>(cs, &dst_iview ->view , layer, src_format);
4964
4981
}
4965
4982
4966
- r3d_src_gmem<CHIP>(cmd, cs, iview , src_format, dst_format, gmem_offset, cpp);
4983
+ r3d_src_gmem<CHIP>(cmd, cs, src_iview , src_format, dst_format, gmem_offset, cpp);
4967
4984
4968
4985
/* sync GMEM writes with CACHE. */
4969
4986
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
@@ -5033,6 +5050,29 @@ tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a)
5033
5050
(y2 % phys_dev->info ->gmem_align_h && need_y2_align));
5034
5051
}
5035
5052
5053
+ /* The fast path cannot handle the corner case where GMEM and sysmem
5054
+ * attachments have different swap if the GMEM attachment is mutable, which
5055
+ * can happen when a mutable color attachment is being resolved into a
5056
+ * non-mutable resolve attachment. In such a case, if the format is a swapped
5057
+ * format like BGRA8, the color attachment will be stored in GMEM swapped but
5058
+ * the resolve attachment in sysmem will not be swapped and there's no way to
5059
+ * express that in the hardware because it computes the GMEM swap from the
5060
+ * sysmem swap.
5061
+ */
5062
+ static bool
5063
+ tu_attachment_store_mismatched_swap (struct tu_cmd_buffer *cmd, uint32_t a,
5064
+ uint32_t gmem_a)
5065
+ {
5066
+ if (a == gmem_a)
5067
+ return false ;
5068
+
5069
+ const struct tu_image_view *dst_iview = cmd->state .attachments [a];
5070
+ const struct tu_image_view *src_iview = cmd->state .attachments [gmem_a];
5071
+
5072
+ return src_iview->view .is_mutable &&
5073
+ dst_iview->view .color_swap != src_iview->view .color_swap ;
5074
+ }
5075
+
5036
5076
/* Choose the GMEM layout (use the CCU space or not) based on whether the
5037
5077
* current attachments will need. This has to happen at vkBeginRenderPass()
5038
5078
* time because tu_attachment_store_unaligned() looks at the image views, which
@@ -5062,6 +5102,21 @@ tu_choose_gmem_layout(struct tu_cmd_buffer *cmd)
5062
5102
cmd->state .gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
5063
5103
}
5064
5104
5105
+ for (unsigned i = 0 ; i < cmd->state .pass ->subpass_count ; i++) {
5106
+ const struct tu_subpass *subpass = &cmd->state .pass ->subpasses [i];
5107
+ for (unsigned j = 0 ; j < subpass->resolve_count ; j++) {
5108
+ uint32_t a = subpass->resolve_attachments [j].attachment ;
5109
+ if (a == VK_ATTACHMENT_UNUSED)
5110
+ continue ;
5111
+ uint32_t gmem_a =
5112
+ j == subpass->color_count ?
5113
+ subpass->depth_stencil_attachment .attachment :
5114
+ subpass->color_attachments [j].attachment ;
5115
+ if (tu_attachment_store_mismatched_swap (cmd, a, gmem_a))
5116
+ cmd->state .gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
5117
+ }
5118
+ }
5119
+
5065
5120
cmd->state .tiling = &cmd->state .framebuffer ->tiling [cmd->state .gmem_layout ];
5066
5121
}
5067
5122
@@ -5117,8 +5172,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5117
5172
{
5118
5173
const VkRect2D *render_area = &cmd->state .render_area ;
5119
5174
struct tu_render_pass_attachment *dst = &cmd->state .pass ->attachments [a];
5120
- const struct tu_image_view *iview = cmd->state .attachments [a];
5175
+ const struct tu_image_view *dst_iview = cmd->state .attachments [a];
5121
5176
struct tu_render_pass_attachment *src = &cmd->state .pass ->attachments [gmem_a];
5177
+ const struct tu_image_view *src_iview = cmd->state .attachments [a];
5122
5178
const VkClearValue *clear_value = &cmd->state .clear_values [gmem_a];
5123
5179
bool resolve = a != gmem_a;
5124
5180
if (resolve)
@@ -5128,6 +5184,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5128
5184
return ;
5129
5185
5130
5186
bool unaligned = tu_attachment_store_unaligned (cmd, a);
5187
+ bool mismatched_swap = tu_attachment_store_mismatched_swap (cmd, a, gmem_a);
5131
5188
5132
5189
/* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
5133
5190
* one for depth and other for stencil. When resolving a MSAA
@@ -5147,7 +5204,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5147
5204
bool store_common = dst->store && !resolve_d32s8_s8;
5148
5205
bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
5149
5206
5150
- bool use_fast_path = !unaligned && !resolve_d24s8_s8 &&
5207
+ bool use_fast_path = !unaligned && !mismatched_swap && ! resolve_d24s8_s8 &&
5151
5208
(a == gmem_a || blit_can_resolve (dst->format ));
5152
5209
5153
5210
trace_start_gmem_store (&cmd->trace , cs, dst->format , use_fast_path, unaligned);
@@ -5163,9 +5220,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5163
5220
/* use fast path when render area is aligned, except for unsupported resolve cases */
5164
5221
if (use_fast_path) {
5165
5222
if (store_common)
5166
- tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview , src, clear_value, BLIT_EVENT_STORE, false );
5223
+ tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview , src, clear_value, BLIT_EVENT_STORE, false );
5167
5224
if (store_separate_stencil)
5168
- tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview , src, clear_value, BLIT_EVENT_STORE, true );
5225
+ tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview , src, clear_value, BLIT_EVENT_STORE, true );
5169
5226
5170
5227
if (cond_exec) {
5171
5228
tu_end_load_store_cond_exec (cmd, cs, false );
@@ -5198,11 +5255,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5198
5255
5199
5256
for_each_layer (i, layer_mask, layers) {
5200
5257
if (store_common) {
5201
- store_3d_blit<CHIP>(cmd, cs, iview , dst->samples , false , src_format,
5258
+ store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview , dst->samples , false , src_format,
5202
5259
dst_format, render_area, i, tu_attachment_gmem_offset (cmd, src, i), src->cpp );
5203
5260
}
5204
5261
if (store_separate_stencil) {
5205
- store_3d_blit<CHIP>(cmd, cs, iview , dst->samples , true , PIPE_FORMAT_S8_UINT,
5262
+ store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview , dst->samples , true , PIPE_FORMAT_S8_UINT,
5206
5263
PIPE_FORMAT_S8_UINT, render_area, i,
5207
5264
tu_attachment_gmem_offset_stencil (cmd, src, i), src->samples );
5208
5265
}
@@ -5236,11 +5293,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
5236
5293
state);
5237
5294
}
5238
5295
if (store_common) {
5239
- store_cp_blit<CHIP>(cmd, cs, iview , src->samples , false , src_format,
5296
+ store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview , src->samples , false , src_format,
5240
5297
dst_format, i, tu_attachment_gmem_offset (cmd, src, i), src->cpp );
5241
5298
}
5242
5299
if (store_separate_stencil) {
5243
- store_cp_blit<CHIP>(cmd, cs, iview , src->samples , true , PIPE_FORMAT_S8_UINT,
5300
+ store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview , src->samples , true , PIPE_FORMAT_S8_UINT,
5244
5301
PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil (cmd, src, i), src->samples );
5245
5302
}
5246
5303
}
0 commit comments