Skip to content

Commit a104a7c

Browse files
cwabbott0Marge Bot
authored andcommitted
tu: Handle non-identity GMEM swaps when resolving
There is a single swap field for each color attachment, regardless of whether it's in GMEM or not, and this does appear to be used in GMEM mode when MUTABLEEN is set on the attachment. This means that when a color attachment has a non-identity swap because it's mutable on a750, we have to use the same corresponding swap when it's a source in a GMEM resolve. When using the fastpath, we have to make sure that the swaps match because there aren't separate fields for GMEM and sysmem swap. This fixes dEQP-VK.image.mutable.2d.*_b8g8r8a8_unorm_draw_copy_resolve with TU_DEBUG=gmem. Fixes: 247d11d ("tu: Allow UBWC with images with swapped formats.") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33115>
1 parent 450755b commit a104a7c

File tree

3 files changed

+89
-28
lines changed

3 files changed

+89
-28
lines changed

src/freedreno/fdl/fd6_view.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,8 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
402402
tile_mode == TILE6_LINEAR && args->base_miplevel != layout->mip_levels - 1;
403403

404404
view->ubwc_enabled = ubwc_enabled;
405+
view->is_mutable = layout->is_mutable;
406+
view->color_swap = color_swap;
405407

406408
view->RB_MRT_BUF_INFO =
407409
A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |

src/freedreno/fdl/freedreno_layout.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,8 @@ struct fdl6_view {
297297
bool need_y2_align;
298298

299299
bool ubwc_enabled;
300+
bool is_mutable;
301+
uint8_t color_swap;
300302

301303
enum pipe_format format;
302304

src/freedreno/vulkan/tu_clear_blit.cc

Lines changed: 85 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1334,7 +1334,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
13341334
uint32_t desc[A6XX_TEX_CONST_DWORDS];
13351335
memcpy(desc, iview->view.descriptor, sizeof(desc));
13361336

1337-
enum a6xx_format fmt = blit_format_texture<CHIP>(format, TILE6_LINEAR, false, true).fmt;
1337+
enum a6xx_format fmt =
1338+
blit_format_texture<CHIP>(format, TILE6_2,
1339+
iview->view.is_mutable, true).fmt;
13381340
fixup_src_format(&format, dst_format, &fmt);
13391341

13401342
/* patch the format so that depth/stencil get the right format and swizzle */
@@ -1348,7 +1350,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
13481350
A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
13491351

13501352
/* patched for gmem */
1351-
desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
1353+
desc[0] &= ~A6XX_TEX_CONST_0_TILE_MODE__MASK;
1354+
if (!iview->view.is_mutable)
1355+
desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
13521356
desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
13531357
desc[2] =
13541358
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
@@ -4856,7 +4860,8 @@ template <chip CHIP>
48564860
static void
48574861
store_cp_blit(struct tu_cmd_buffer *cmd,
48584862
struct tu_cs *cs,
4859-
const struct tu_image_view *iview,
4863+
const struct tu_image_view *src_iview,
4864+
const struct tu_image_view *dst_iview,
48604865
uint32_t samples,
48614866
bool separate_stencil,
48624867
enum pipe_format src_format,
@@ -4867,33 +4872,44 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
48674872
{
48684873
r2d_setup_common<CHIP>(cmd, cs, src_format, dst_format,
48694874
VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
4870-
iview->view.ubwc_enabled, true);
4875+
dst_iview->view.ubwc_enabled, true);
48714876

4872-
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4877+
if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
48734878
if (!separate_stencil) {
4874-
r2d_dst_depth(cs, iview, layer);
4879+
r2d_dst_depth(cs, dst_iview, layer);
48754880
} else {
4876-
r2d_dst_stencil(cs, iview, layer);
4881+
r2d_dst_stencil(cs, dst_iview, layer);
48774882
}
48784883
} else {
4879-
r2d_dst<CHIP>(cs, &iview->view, layer, src_format);
4884+
r2d_dst<CHIP>(cs, &dst_iview->view, layer, src_format);
48804885
}
48814886

4882-
enum a6xx_format fmt = blit_format_texture<CHIP>(src_format, TILE6_2, false, true).fmt;
4883-
fixup_src_format(&src_format, dst_format, &fmt);
4887+
/* Note: we compute the swap here instead of using the color_swap as
4888+
* programmed when we setup the color attachment because the attachment in
4889+
* GMEM ignores the swap except when MUTABLEEN is enabled. If the
4890+
* color attachment is linear, we need to use the identity swap even if the
4891+
* original attachment has a non-identity swap.
4892+
*/
4893+
struct tu_native_format fmt =
4894+
blit_format_texture<CHIP>(src_format, TILE6_2,
4895+
src_iview->view.is_mutable, true);
4896+
enum a6xx_format format = fmt.fmt;
4897+
fixup_src_format(&src_format, dst_format, &format);
48844898

48854899
tu_cs_emit_regs(cs,
48864900
SP_PS_2D_SRC_INFO(CHIP,
4887-
.color_format = fmt,
4901+
.color_format = format,
48884902
.tile_mode = TILE6_2,
4889-
.color_swap = WZYX,
4903+
.color_swap = fmt.swap,
48904904
.srgb = util_format_is_srgb(src_format),
48914905
.samples = tu_msaa_samples(samples),
48924906
.samples_average = !util_format_is_pure_integer(dst_format) &&
48934907
!util_format_is_depth_or_stencil(dst_format),
48944908
.unk20 = 1,
48954909
.unk22 = 1),
4896-
SP_PS_2D_SRC_SIZE(CHIP, .width = iview->vk.extent.width, .height = iview->vk.extent.height),
4910+
SP_PS_2D_SRC_SIZE(CHIP,
4911+
.width = dst_iview->vk.extent.width,
4912+
.height = dst_iview->vk.extent.height),
48974913
SP_PS_2D_SRC(CHIP, .qword = cmd->device->physical_device->gmem_base + gmem_offset),
48984914
SP_PS_2D_SRC_PITCH(CHIP, .pitch = cmd->state.tiling->tile0.width * cpp));
48994915

@@ -4921,7 +4937,8 @@ template <chip CHIP>
49214937
static void
49224938
store_3d_blit(struct tu_cmd_buffer *cmd,
49234939
struct tu_cs *cs,
4924-
const struct tu_image_view *iview,
4940+
const struct tu_image_view *src_iview,
4941+
const struct tu_image_view *dst_iview,
49254942
VkSampleCountFlagBits dst_samples,
49264943
bool separate_stencil,
49274944
enum pipe_format src_format,
@@ -4949,21 +4966,21 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
49494966
}
49504967

49514968
r3d_setup<CHIP>(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT,
4952-
0, false, iview->view.ubwc_enabled, dst_samples);
4969+
0, false, dst_iview->view.ubwc_enabled, dst_samples);
49534970

49544971
r3d_coords(cmd, cs, render_area->offset, render_area->offset, render_area->extent);
49554972

4956-
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
4973+
if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
49574974
if (!separate_stencil) {
4958-
r3d_dst_depth<CHIP>(cs, iview, layer);
4975+
r3d_dst_depth<CHIP>(cs, dst_iview, layer);
49594976
} else {
4960-
r3d_dst_stencil<CHIP>(cs, iview, layer);
4977+
r3d_dst_stencil<CHIP>(cs, dst_iview, layer);
49614978
}
49624979
} else {
4963-
r3d_dst<CHIP>(cs, &iview->view, layer, src_format);
4980+
r3d_dst<CHIP>(cs, &dst_iview->view, layer, src_format);
49644981
}
49654982

4966-
r3d_src_gmem<CHIP>(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);
4983+
r3d_src_gmem<CHIP>(cmd, cs, src_iview, src_format, dst_format, gmem_offset, cpp);
49674984

49684985
/* sync GMEM writes with CACHE. */
49694986
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
@@ -5033,6 +5050,29 @@ tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a)
50335050
(y2 % phys_dev->info->gmem_align_h && need_y2_align));
50345051
}
50355052

5053+
/* The fast path cannot handle the corner case where GMEM and sysmem
5054+
* attachments have different swap if the GMEM attachment is mutable, which
5055+
* can happen when a mutable color attachment is being resolved into a
5056+
* non-mutable resolve attachment. In such a case, if the format is a swapped
5057+
* format like BGRA8, the color attachment will be stored in GMEM swapped but
5058+
* the resolve attachment in sysmem will not be swapped and there's no way to
5059+
* express that in the hardware because it computes the GMEM swap from the
5060+
* sysmem swap.
5061+
*/
5062+
static bool
5063+
tu_attachment_store_mismatched_swap(struct tu_cmd_buffer *cmd, uint32_t a,
5064+
uint32_t gmem_a)
5065+
{
5066+
if (a == gmem_a)
5067+
return false;
5068+
5069+
const struct tu_image_view *dst_iview = cmd->state.attachments[a];
5070+
const struct tu_image_view *src_iview = cmd->state.attachments[gmem_a];
5071+
5072+
return src_iview->view.is_mutable &&
5073+
dst_iview->view.color_swap != src_iview->view.color_swap;
5074+
}
5075+
50365076
/* Choose the GMEM layout (use the CCU space or not) based on whether the
50375077
* current attachments will need. This has to happen at vkBeginRenderPass()
50385078
* time because tu_attachment_store_unaligned() looks at the image views, which
@@ -5062,6 +5102,21 @@ tu_choose_gmem_layout(struct tu_cmd_buffer *cmd)
50625102
cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
50635103
}
50645104

5105+
for (unsigned i = 0; i < cmd->state.pass->subpass_count; i++) {
5106+
const struct tu_subpass *subpass = &cmd->state.pass->subpasses[i];
5107+
for (unsigned j = 0; j < subpass->resolve_count; j++) {
5108+
uint32_t a = subpass->resolve_attachments[j].attachment;
5109+
if (a == VK_ATTACHMENT_UNUSED)
5110+
continue;
5111+
uint32_t gmem_a =
5112+
j == subpass->color_count ?
5113+
subpass->depth_stencil_attachment.attachment :
5114+
subpass->color_attachments[j].attachment;
5115+
if (tu_attachment_store_mismatched_swap(cmd, a, gmem_a))
5116+
cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
5117+
}
5118+
}
5119+
50655120
cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
50665121
}
50675122

@@ -5117,8 +5172,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
51175172
{
51185173
const VkRect2D *render_area = &cmd->state.render_area;
51195174
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
5120-
const struct tu_image_view *iview = cmd->state.attachments[a];
5175+
const struct tu_image_view *dst_iview = cmd->state.attachments[a];
51215176
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
5177+
const struct tu_image_view *src_iview = cmd->state.attachments[a];
51225178
const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a];
51235179
bool resolve = a != gmem_a;
51245180
if (resolve)
@@ -5128,6 +5184,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
51285184
return;
51295185

51305186
bool unaligned = tu_attachment_store_unaligned(cmd, a);
5187+
bool mismatched_swap = tu_attachment_store_mismatched_swap(cmd, a, gmem_a);
51315188

51325189
/* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
51335190
* one for depth and other for stencil. When resolving a MSAA
@@ -5147,7 +5204,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
51475204
bool store_common = dst->store && !resolve_d32s8_s8;
51485205
bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
51495206

5150-
bool use_fast_path = !unaligned && !resolve_d24s8_s8 &&
5207+
bool use_fast_path = !unaligned && !mismatched_swap && !resolve_d24s8_s8 &&
51515208
(a == gmem_a || blit_can_resolve(dst->format));
51525209

51535210
trace_start_gmem_store(&cmd->trace, cs, dst->format, use_fast_path, unaligned);
@@ -5163,9 +5220,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
51635220
/* use fast path when render area is aligned, except for unsupported resolve cases */
51645221
if (use_fast_path) {
51655222
if (store_common)
5166-
tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, false);
5223+
tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, false);
51675224
if (store_separate_stencil)
5168-
tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, true);
5225+
tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, true);
51695226

51705227
if (cond_exec) {
51715228
tu_end_load_store_cond_exec(cmd, cs, false);
@@ -5198,11 +5255,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
51985255

51995256
for_each_layer(i, layer_mask, layers) {
52005257
if (store_common) {
5201-
store_3d_blit<CHIP>(cmd, cs, iview, dst->samples, false, src_format,
5258+
store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview, dst->samples, false, src_format,
52025259
dst_format, render_area, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
52035260
}
52045261
if (store_separate_stencil) {
5205-
store_3d_blit<CHIP>(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
5262+
store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
52065263
PIPE_FORMAT_S8_UINT, render_area, i,
52075264
tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
52085265
}
@@ -5236,11 +5293,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
52365293
state);
52375294
}
52385295
if (store_common) {
5239-
store_cp_blit<CHIP>(cmd, cs, iview, src->samples, false, src_format,
5296+
store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview, src->samples, false, src_format,
52405297
dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
52415298
}
52425299
if (store_separate_stencil) {
5243-
store_cp_blit<CHIP>(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
5300+
store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview, src->samples, true, PIPE_FORMAT_S8_UINT,
52445301
PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
52455302
}
52465303
}

0 commit comments

Comments
 (0)