Skip to content

Commit 4ec6752

Browse files
committed
GPUPresenter: Handle padding for unscaled/slang input
1 parent 5da178e commit 4ec6752

File tree

3 files changed

+167
-83
lines changed

3 files changed

+167
-83
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Other features include:
3333
- Motion adaptive deinterlacing.
3434
- Adaptive downsampling filter.
3535
- Screen rotation for vertical or "TATE" shmup games.
36-
- Post processing shader chains (GLSL and Reshade FX).
36+
- Post processing shader chains (GLSL, Reshade FX and Slang Presets).
3737
- Border overlays/bezels displayed around game content.
3838
- "Fast boot" for skipping BIOS splash/intro.
3939
- Save state support, with runahead and rewind.

src/core/gpu_presenter.cpp

Lines changed: 157 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ void GPUPresenter::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y
428428
}
429429

430430
GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const GSVector2i target_size, bool postfx,
431-
bool apply_aspect_ratio)
431+
bool apply_aspect_ratio) const
432432
{
433433
GL_SCOPE_FMT("RenderDisplay: {}x{}", target_size.x, target_size.y);
434434

@@ -450,9 +450,8 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
450450
GL_INS_FMT("Final target size: {}x{}", target_size.x, target_size.y);
451451

452452
// Compute draw area.
453-
GSVector4i display_rect;
454-
GSVector4i draw_rect;
455-
GSVector4i real_draw_rect;
453+
GSVector4i display_rect, display_rect_without_overlay;
454+
GSVector4i draw_rect, draw_rect_without_overlay;
456455
GSVector4i overlay_display_rect = GSVector4i::zero();
457456
GSVector4i overlay_rect = GSVector4i::zero();
458457
if (have_overlay)
@@ -472,31 +471,56 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
472471

473472
// Draw to the overlay area instead of the whole screen. Always align in center, we align the overlay instead.
474473
CalculateDrawRect(overlay_display_rect.width(), overlay_display_rect.height(), apply_aspect_ratio, integer_scale,
475-
false, &display_rect, &real_draw_rect);
474+
false, &display_rect_without_overlay, &draw_rect_without_overlay);
476475

477476
// Apply overlay area offset.
478-
display_rect = display_rect.add32(overlay_display_rect.xyxy());
479-
draw_rect = real_draw_rect.add32(overlay_display_rect.xyxy());
477+
display_rect = display_rect_without_overlay.add32(overlay_display_rect.xyxy());
478+
draw_rect = draw_rect_without_overlay.add32(overlay_display_rect.xyxy());
480479
}
481480
else
482481
{
483-
CalculateDrawRect(target_size.x, target_size.y, apply_aspect_ratio, integer_scale, true, &display_rect, &draw_rect);
484-
real_draw_rect = draw_rect;
482+
CalculateDrawRect(target_size.x, target_size.y, apply_aspect_ratio, integer_scale, true,
483+
&display_rect_without_overlay, &draw_rect_without_overlay);
484+
display_rect = display_rect_without_overlay;
485+
draw_rect = draw_rect_without_overlay;
485486
}
486487

487488
// There's a bunch of scenarios where we need to use intermediate buffers.
488489
// If we have post-processing and overlays enabled, postfx needs to happen on an intermediate buffer first.
489490
// If pre-rotation is enabled with post-processing, we need to draw to an intermediate buffer, and apply the
490-
// rotation at the end.
491-
const GSVector2i postfx_size = have_overlay ? overlay_display_rect.rsize() : target_size;
492-
const bool really_postfx =
493-
(postfx && !is_vram_view && m_display_postfx && m_display_postfx->IsActive() && m_display_postfx &&
494-
m_display_postfx->CheckTargets(m_display_texture ? m_display_texture->GetFormat() : GPUTexture::Format::Unknown,
495-
m_display_texture_view_width, m_display_texture_view_height, m_present_format,
496-
postfx_size.x, postfx_size.y, m_display_texture ? real_draw_rect.width() : 0,
497-
m_display_texture ? real_draw_rect.height() : 0));
498-
GL_INS(really_postfx ? "Post-processing is ENABLED" : "Post-processing is disabled");
499-
GL_INS_FMT("Post-processing render target size: {}x{}", postfx_size.x, postfx_size.y);
491+
// rotation at the end. Unscaled/slang post-processing applies rotation after post-processing.
492+
bool postfx_active = (postfx && !is_vram_view && m_display_postfx && m_display_postfx->IsActive());
493+
bool postfx_delayed_rotation = false;
494+
if (postfx_active)
495+
{
496+
// Viewport is consistent, but dependent on border overlay.
497+
GSVector2i postfx_source_size = CalculateDisplayPostProcessSourceSize();
498+
GSVector2i postfx_viewport_size = display_rect.rsize();
499+
GSVector2i postfx_target_size = (have_overlay ? overlay_display_rect.rsize() : target_size);
500+
501+
// If we're using unscaled post-processing, then we do the post-processing without rotation and apply it later.
502+
if (m_display_postfx->WantsUnscaledInput() &&
503+
(postfx_delayed_rotation = (g_gpu_settings.display_rotation == DisplayRotation::Rotate90 ||
504+
g_gpu_settings.display_rotation == DisplayRotation::Rotate270)))
505+
{
506+
postfx_target_size = postfx_target_size.yx();
507+
postfx_viewport_size = postfx_viewport_size.yx();
508+
}
509+
510+
// This could fail if we run out of VRAM.
511+
if ((postfx_active = m_display_postfx->CheckTargets(
512+
m_display_texture ? m_display_texture->GetFormat() : GPUTexture::Format::Unknown, postfx_source_size.x,
513+
postfx_source_size.y, m_present_format, postfx_target_size.x, postfx_target_size.y, postfx_viewport_size.x,
514+
postfx_viewport_size.y)))
515+
{
516+
GL_INS("Post-processing is ACTIVE this frame");
517+
GL_INS_FMT("Post-processing source size: {}x{}", postfx_source_size.x, postfx_source_size.y);
518+
GL_INS_FMT("Post-processing target size: {}x{}", postfx_target_size.x, postfx_target_size.y);
519+
GL_INS_FMT("Post-processing viewport size: {}x{}", postfx_viewport_size.x, postfx_viewport_size.y);
520+
GL_INS_FMT("Post-processing input texture size: {}x{}", m_display_postfx->GetInputTexture()->GetWidth(),
521+
m_display_postfx->GetInputTexture()->GetHeight());
522+
}
523+
}
500524

501525
// Helper to bind swap chain/final target.
502526
const auto bind_final_target = [&target, &swap_chain, &final_target_size](bool clear) {
@@ -520,63 +544,20 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
520544
};
521545

522546
// If postfx is enabled, we need to draw to an intermediate buffer first.
523-
if (really_postfx)
547+
if (postfx_active)
524548
{
525549
// Display is always drawn to the postfx input.
526-
GPUTexture* postfx_input;
527-
if (!m_display_postfx->WantsUnscaledInput() || !m_display_texture)
528-
{
529-
postfx_input = m_display_postfx->GetInputTexture();
530-
g_gpu_device->ClearRenderTarget(postfx_input, GPUDevice::DEFAULT_CLEAR_COLOR);
531-
g_gpu_device->SetRenderTarget(postfx_input);
532-
g_gpu_device->SetViewport(GSVector4i::loadh(postfx_size));
533-
if (m_display_texture)
534-
{
535-
DrawDisplay(postfx_size, postfx_size, real_draw_rect, false, g_gpu_settings.display_rotation,
536-
WindowInfo::PreRotation::Identity);
537-
}
538-
}
539-
else
540-
{
541-
// TODO: If there's padding, it needs to be applied here.
542-
// TODO: If rotating, we need to apply it here too.
543-
postfx_input = m_display_texture;
544-
if (g_gpu_device->UsesLowerLeftOrigin() || g_settings.display_rotation != DisplayRotation::Normal)
545-
{
546-
// OpenGL needs to flip the correct way around.
547-
const GSVector2i input_size = GSVector2i(m_display_texture_view_width, m_display_texture_view_height);
548-
const GSVector4 src_uv_rect = GSVector4(GSVector4i(m_display_texture_view_x, m_display_texture_view_y,
549-
m_display_texture_view_x + m_display_texture_view_width,
550-
m_display_texture_view_y + m_display_texture_view_height)) /
551-
GSVector4::xyxy(GSVector2(m_display_texture->GetSizeVec()));
552-
553-
postfx_input = m_display_postfx->GetInputTexture();
554-
m_display_texture->MakeReadyForSampling();
555-
g_gpu_device->SetRenderTarget(postfx_input);
556-
g_gpu_device->SetViewportAndScissor(GSVector4i::loadh(input_size));
557-
g_gpu_device->SetPipeline(m_present_copy_pipeline.get());
558-
g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler());
559-
DrawScreenQuad(GSVector4i::loadh(input_size), src_uv_rect, input_size, input_size, g_settings.display_rotation,
560-
prerotation, nullptr, 0);
561-
}
562-
else if (m_display_texture_view_x != 0 || m_display_texture_view_y != 0 ||
563-
m_display_texture->GetWidth() != static_cast<u32>(m_display_texture_view_width) ||
564-
m_display_texture->GetHeight() != static_cast<u32>(m_display_texture_view_height))
565-
{
566-
postfx_input = m_display_postfx->GetInputTexture();
567-
g_gpu_device->CopyTextureRegion(postfx_input, 0, 0, 0, 0, m_display_texture, m_display_texture_view_x,
568-
m_display_texture_view_y, 0, 0, m_display_texture_view_width,
569-
m_display_texture_view_height);
570-
}
571-
}
572-
550+
GPUTexture* postfx_input = GetDisplayPostProcessInputTexture(
551+
draw_rect_without_overlay, postfx_delayed_rotation ? DisplayRotation::Normal : g_gpu_settings.display_rotation);
573552
postfx_input->MakeReadyForSampling();
574553

575554
// Apply postprocessing to an intermediate texture if we're prerotating or have an overlay.
576-
if (have_prerotation || have_overlay)
555+
if (have_prerotation || have_overlay || postfx_delayed_rotation)
577556
{
578557
GPUTexture* const postfx_output = m_display_postfx->GetTextureUnusedAtEndOfChain();
579-
ApplyDisplayPostProcess(postfx_output, postfx_input, real_draw_rect, postfx_size);
558+
const GSVector4i postfx_final_rect =
559+
postfx_delayed_rotation ? display_rect_without_overlay.yxwz() : display_rect_without_overlay;
560+
ApplyDisplayPostProcess(postfx_output, postfx_input, postfx_final_rect, postfx_output->GetSizeVec());
580561
postfx_output->MakeReadyForSampling();
581562

582563
// Start draw to final buffer.
@@ -588,6 +569,8 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
588569
GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f);
589570

590571
// If we have an overlay, draw it, and then copy the postprocessed framebuffer in.
572+
const DisplayRotation present_rotation =
573+
postfx_delayed_rotation ? g_gpu_settings.display_rotation : DisplayRotation::Normal;
591574
if (have_overlay)
592575
{
593576
GL_SCOPE_FMT("Draw overlay and postfx buffer");
@@ -598,17 +581,17 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
598581

599582
g_gpu_device->SetPipeline(m_present_copy_blend_pipeline.get());
600583
g_gpu_device->SetTextureSampler(0, postfx_output, g_gpu_device->GetNearestSampler());
601-
DrawScreenQuad(overlay_display_rect, src_uv_rect, target_size, final_target_size, DisplayRotation::Normal,
602-
prerotation, nullptr, 0);
584+
DrawScreenQuad(overlay_display_rect, src_uv_rect, target_size, final_target_size, present_rotation, prerotation,
585+
nullptr, 0);
603586
}
604587
else
605588
{
606589
// Otherwise, just copy the framebuffer.
607590
GL_SCOPE_FMT("Copy framebuffer for prerotation");
608591
g_gpu_device->SetPipeline(m_present_copy_pipeline.get());
609592
g_gpu_device->SetTextureSampler(0, postfx_output, g_gpu_device->GetNearestSampler());
610-
DrawScreenQuad(GSVector4i::loadh(postfx_size), src_uv_rect, target_size, final_target_size,
611-
DisplayRotation::Normal, prerotation, nullptr, 0);
593+
DrawScreenQuad(GSVector4i::loadh(target_size), src_uv_rect, target_size, final_target_size, present_rotation,
594+
prerotation, nullptr, 0);
612595
}
613596

614597
// All done
@@ -617,7 +600,7 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
617600
else
618601
{
619602
// Otherwise apply postprocessing directly to swap chain.
620-
return ApplyDisplayPostProcess(target, postfx_input, display_rect, postfx_size);
603+
return ApplyDisplayPostProcess(target, postfx_input, display_rect, target_size);
621604
}
622605
}
623606
else
@@ -655,7 +638,7 @@ GPUDevice::PresentResult GPUPresenter::RenderDisplay(GPUTexture* target, const G
655638

656639
void GPUPresenter::DrawOverlayBorders(const GSVector2i target_size, const GSVector2i final_target_size,
657640
const GSVector4i overlay_display_rect, const GSVector4i draw_rect,
658-
const WindowInfo::PreRotation prerotation)
641+
const WindowInfo::PreRotation prerotation) const
659642
{
660643
GL_SCOPE_FMT("Fill in overlay borders - odisplay={}, draw={}", overlay_display_rect, draw_rect);
661644

@@ -727,7 +710,7 @@ void GPUPresenter::DrawOverlayBorders(const GSVector2i target_size, const GSVect
727710

728711
void GPUPresenter::DrawDisplay(const GSVector2i target_size, const GSVector2i final_target_size,
729712
const GSVector4i display_rect, bool dst_alpha_blend, DisplayRotation rotation,
730-
WindowInfo::PreRotation prerotation)
713+
WindowInfo::PreRotation prerotation) const
731714
{
732715
bool texture_filter_linear = false;
733716

@@ -865,9 +848,106 @@ void GPUPresenter::DrawScreenQuad(const GSVector4i rect, const GSVector4 uv_rect
865848
g_gpu_device->Draw(4, base_vertex);
866849
}
867850

851+
GSVector2i GPUPresenter::CalculateDisplayPostProcessSourceSize() const
852+
{
853+
DebugAssert(m_display_postfx);
854+
855+
// Unscaled is easy.
856+
if (!m_display_postfx->WantsUnscaledInput())
857+
{
858+
// Render to an input texture that's viewport sized. Source is the "real" input texture.
859+
return GSVector2i(m_display_texture_view_width, m_display_texture_view_height);
860+
}
861+
else
862+
{
863+
// Need to include the borders in the size. This is very janky, since we need to correct upscaling.
864+
// Source and input is the full display texture size (including padding).
865+
const GSVector2i input_size = GSVector2i(m_display_texture_view_width, m_display_texture_view_height);
866+
const GSVector2i native_size = GSVector2i(m_display_vram_width, m_display_vram_height);
867+
const GSVector2i native_display_size = GSVector2i(m_display_width, m_display_height);
868+
const GSVector2 scale = GSVector2(input_size) / GSVector2(native_size);
869+
return GSVector2i((GSVector2(native_display_size) * scale).ceil());
870+
}
871+
}
872+
873+
GPUTexture* GPUPresenter::GetDisplayPostProcessInputTexture(const GSVector4i draw_rect_without_overlay,
874+
DisplayRotation rotation) const
875+
{
876+
DebugAssert(m_display_postfx);
877+
878+
GPUTexture* postfx_input;
879+
if (!m_display_postfx->WantsUnscaledInput() || !m_display_texture)
880+
{
881+
// Render to postfx input as if it was the final display.
882+
postfx_input = m_display_postfx->GetInputTexture();
883+
g_gpu_device->ClearRenderTarget(postfx_input, GPUDevice::DEFAULT_CLEAR_COLOR);
884+
if (m_display_texture)
885+
{
886+
const GSVector2i postfx_input_size = postfx_input->GetSizeVec();
887+
g_gpu_device->SetRenderTarget(postfx_input);
888+
g_gpu_device->SetViewport(GSVector4i::loadh(postfx_input_size));
889+
890+
DrawDisplay(postfx_input_size, postfx_input_size, draw_rect_without_overlay, false, rotation,
891+
WindowInfo::PreRotation::Identity);
892+
}
893+
}
894+
else
895+
{
896+
postfx_input = m_display_texture;
897+
898+
// OpenGL needs to flip the correct way around. If the source is exactly the same size without
899+
// any correction, we can pass it through to the chain directly.
900+
if (g_gpu_device->UsesLowerLeftOrigin() || rotation != DisplayRotation::Normal || m_display_origin_left != 0 ||
901+
m_display_origin_top != 0 || m_display_vram_width != m_display_texture_view_width ||
902+
m_display_vram_height != m_display_texture_view_height)
903+
{
904+
GL_SCOPE_FMT("Pre-process postfx source");
905+
906+
const GSVector2i input_size = GSVector2i(m_display_texture_view_width, m_display_texture_view_height);
907+
const GSVector2i native_size = GSVector2i(m_display_vram_width, m_display_vram_height);
908+
const GSVector2 input_scale = GSVector2(input_size) / GSVector2(native_size);
909+
const GSVector4i input_draw_rect = GSVector4i(
910+
(GSVector4(GSVector4i(m_display_origin_left, m_display_origin_top, m_display_origin_left + m_display_vram_width,
911+
m_display_origin_top + m_display_vram_height)) *
912+
GSVector4::xyxy(input_scale))
913+
.floor());
914+
915+
const GSVector4 src_uv_rect = GSVector4(GSVector4i(m_display_texture_view_x, m_display_texture_view_y,
916+
m_display_texture_view_x + m_display_texture_view_width,
917+
m_display_texture_view_y + m_display_texture_view_height)) /
918+
GSVector4::xyxy(GSVector2(m_display_texture->GetSizeVec()));
919+
920+
postfx_input = m_display_postfx->GetInputTexture();
921+
m_display_texture->MakeReadyForSampling();
922+
923+
const GSVector2i postfx_input_size = postfx_input->GetSizeVec();
924+
g_gpu_device->ClearRenderTarget(postfx_input, GPUDevice::DEFAULT_CLEAR_COLOR);
925+
g_gpu_device->SetRenderTarget(postfx_input);
926+
g_gpu_device->SetViewportAndScissor(GSVector4i::loadh(postfx_input_size));
927+
g_gpu_device->SetPipeline(m_present_copy_pipeline.get());
928+
g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler());
929+
DrawScreenQuad(input_draw_rect, src_uv_rect, postfx_input_size, postfx_input_size, rotation,
930+
WindowInfo::PreRotation::Identity, nullptr, 0);
931+
}
932+
else if (m_display_texture_view_x != 0 || m_display_texture_view_y != 0 ||
933+
m_display_texture->GetWidth() != static_cast<u32>(m_display_texture_view_width) ||
934+
m_display_texture->GetHeight() != static_cast<u32>(m_display_texture_view_height))
935+
{
936+
GL_SCOPE_FMT("Copy postfx source");
937+
938+
postfx_input = m_display_postfx->GetInputTexture();
939+
g_gpu_device->CopyTextureRegion(postfx_input, 0, 0, 0, 0, m_display_texture, m_display_texture_view_x,
940+
m_display_texture_view_y, 0, 0, m_display_texture_view_width,
941+
m_display_texture_view_height);
942+
}
943+
}
944+
945+
return postfx_input;
946+
}
947+
868948
GPUDevice::PresentResult GPUPresenter::ApplyDisplayPostProcess(GPUTexture* target, GPUTexture* input,
869949
const GSVector4i display_rect,
870-
const GSVector2i postfx_size)
950+
const GSVector2i postfx_size) const
871951
{
872952
DebugAssert(!g_gpu_settings.gpu_show_vram);
873953

0 commit comments

Comments
 (0)