From 1daa60c64db02dde54f8c796775bf279ffcaccba Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 7 Jan 2025 23:20:14 +1000 Subject: [PATCH] GPU: Use tristrips for presentation as well --- src/core/gpu_backend.cpp | 117 +++++++++++++++++++------------- src/core/gpu_shadergen.cpp | 17 ++--- src/core/shader_cache_version.h | 2 +- 3 files changed, 77 insertions(+), 59 deletions(-) diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index 125e7f188..458b867ef 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -535,7 +535,6 @@ bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool ch g_gpu_device->GetFeatures().framebuffer_fetch); GPUPipeline::GraphicsConfig plconfig; - plconfig.input_layout.vertex_stride = 0; plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); @@ -548,6 +547,8 @@ bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool ch if (display) { + SetScreenQuadInputLayout(plconfig); + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.SetTargetFormats(g_gpu_device->HasMainSwapChain() ? g_gpu_device->GetMainSwapChain()->GetFormat() : GPUTexture::Format::RGBA8); @@ -589,6 +590,9 @@ bool GPUBackend::CompileDisplayPipelines(bool display, bool deinterlace, bool ch Settings::GetDisplayScalingName(g_gpu_settings.display_scaling)); } + plconfig.input_layout = {}; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + if (deinterlace) { std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), @@ -841,14 +845,12 @@ GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSV const bool really_postfx = (postfx && PostProcessing::DisplayChain.IsActive() && g_gpu_device->HasMainSwapChain() && hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); + const u32 real_target_width = + (target || really_postfx) ? target_width : g_gpu_device->GetMainSwapChain()->GetPostRotatedWidth(); + const u32 real_target_height = + (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight(); GSVector4i real_draw_rect = (target || really_postfx) ? draw_rect : g_gpu_device->GetMainSwapChain()->PreRotateClipRect(draw_rect); - if (g_gpu_device->UsesLowerLeftOrigin()) - { - real_draw_rect = GPUDevice::FlipToLowerLeft( - real_draw_rect, - (target || really_postfx) ? target_height : g_gpu_device->GetMainSwapChain()->GetPostRotatedHeight()); - } if (really_postfx) { g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); @@ -872,13 +874,11 @@ GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSV { bool texture_filter_linear = false; - struct Uniforms + struct alignas(16) Uniforms { - float src_rect[4]; float src_size[4]; float clamp_rect[4]; float params[4]; - float rotation_matrix[2][2]; } uniforms; std::memset(uniforms.params, 0, sizeof(uniforms.params)); @@ -916,50 +916,75 @@ GPUDevice::PresentResult GPUBackend::RenderDisplay(GPUTexture* target, const GSV // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. - const float rcp_width = 1.0f / static_cast(display_texture->GetWidth()); - const float rcp_height = 1.0f / static_cast(display_texture->GetHeight()); - uniforms.src_rect[0] = static_cast(display_texture_view_x) * rcp_width; - uniforms.src_rect[1] = static_cast(display_texture_view_y) * rcp_height; - uniforms.src_rect[2] = static_cast(display_texture_view_width) * rcp_width; - uniforms.src_rect[3] = static_cast(display_texture_view_height) * rcp_height; - uniforms.clamp_rect[0] = (static_cast(display_texture_view_x) + 0.5f) * rcp_width; - uniforms.clamp_rect[1] = (static_cast(display_texture_view_y) + 0.5f) * rcp_height; - uniforms.clamp_rect[2] = - (static_cast(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; - uniforms.clamp_rect[3] = - (static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; - uniforms.src_size[0] = static_cast(display_texture->GetWidth()); - uniforms.src_size[1] = static_cast(display_texture->GetHeight()); - uniforms.src_size[2] = rcp_width; - uniforms.src_size[3] = rcp_height; + const GSVector2 display_texture_size = GSVector2(display_texture->GetSizeVec()); + const GSVector4 display_texture_size4 = GSVector4::xyxy(display_texture_size); + const GSVector4 uv_rect = GSVector4(GSVector4i(display_texture_view_x, display_texture_view_y, + display_texture_view_x + display_texture_view_width, + display_texture_view_y + display_texture_view_height)) / + display_texture_size4; + GSVector4::store(uniforms.clamp_rect, + GSVector4(static_cast(display_texture_view_x) + 0.5f, + static_cast(display_texture_view_y) + 0.5f, + static_cast(display_texture_view_x + display_texture_view_width) - 0.5f, + static_cast(display_texture_view_y + display_texture_view_height) - 0.5f) / + display_texture_size4); + GSVector4::store(uniforms.src_size, + GSVector4::xyxy(display_texture_size, GSVector2::cxpr(1.0f) / display_texture_size)); + + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewport(0, 0, real_target_width, real_target_height); + g_gpu_device->SetScissor(g_gpu_device->UsesLowerLeftOrigin() ? + GPUDevice::FlipToLowerLeft(real_draw_rect, real_target_height) : + real_draw_rect); + + ScreenVertex* vertices; + u32 space; + u32 base_vertex; + g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast(&vertices), &space, &base_vertex); const WindowInfo::PreRotation surface_prerotation = (target || really_postfx) ? WindowInfo::PreRotation::Identity : g_gpu_device->GetMainSwapChain()->GetPreRotation(); - if (g_gpu_settings.display_rotation != DisplayRotation::Normal || - surface_prerotation != WindowInfo::PreRotation::Identity) - { - static constexpr const std::array(DisplayRotation::Count)> rotation_radians = {{ - 0.0f, - static_cast(std::numbers::pi * 1.5f), // Rotate90 - static_cast(std::numbers::pi), // Rotate180 - static_cast(std::numbers::pi / 2.0), // Rotate270 - }}; - const u32 rotation_idx = - (static_cast(g_gpu_settings.display_rotation) + static_cast(surface_prerotation)) % - static_cast(rotation_radians.size()); - GSMatrix2x2::Rotation(rotation_radians[rotation_idx]).store(uniforms.rotation_matrix); - } - else + const DisplayRotation uv_rotation = static_cast( + (static_cast(g_gpu_settings.display_rotation) + static_cast(surface_prerotation)) % + static_cast(DisplayRotation::Count)); + + const GSVector4 xy = + GetScreenQuadClipSpaceCoordinates(real_draw_rect, GSVector2i(real_target_width, real_target_height)); + switch (uv_rotation) { - GSMatrix2x2::Identity().store(uniforms.rotation_matrix); + case DisplayRotation::Normal: + vertices[0].Set(xy.xy(), uv_rect.xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zyzw().xy()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xwzw().xy()); + vertices[3].Set(xy.zw(), uv_rect.zw()); + break; + case DisplayRotation::Rotate90: + vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.xy()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.zw()); + vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); + break; + case DisplayRotation::Rotate180: + vertices[0].Set(xy.xy(), uv_rect.xwzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); + vertices[3].Set(xy.zw(), uv_rect.zyzw().xy()); + break; + case DisplayRotation::Rotate270: + vertices[0].Set(xy.xy(), uv_rect.zyzw().xy()); + vertices[1].Set(xy.zyzw().xy(), uv_rect.zw()); + vertices[2].Set(xy.xwzw().xy(), uv_rect.xy()); + vertices[3].Set(xy.zw(), uv_rect.xwzw().xy()); + break; + + DefaultCaseIsUnreachable(); } - g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewportAndScissor(real_draw_rect); - g_gpu_device->Draw(3, 0); + g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4); + g_gpu_device->Draw(4, base_vertex); } if (really_postfx) diff --git a/src/core/gpu_shadergen.cpp b/src/core/gpu_shadergen.cpp index 4e275d4ab..9e3db1d02 100644 --- a/src/core/gpu_shadergen.cpp +++ b/src/core/gpu_shadergen.cpp @@ -12,11 +12,7 @@ GPUShaderGen::~GPUShaderGen() = default; void GPUShaderGen::WriteDisplayUniformBuffer(std::stringstream& ss) const { - // Rotation matrix split into rows to avoid padding in HLSL. - DeclareUniformBuffer(ss, - {"float4 u_src_rect", "float4 u_src_size", "float4 u_clamp_rect", "float4 u_params", - "float2 u_rotation_matrix0", "float2 u_rotation_matrix1"}, - true); + DeclareUniformBuffer(ss, {"float4 u_src_size", "float4 u_clamp_rect", "float4 u_params"}, true); ss << R"( float2 ClampUV(float2 uv) { @@ -29,16 +25,13 @@ std::string GPUShaderGen::GenerateDisplayVertexShader() const std::stringstream ss; WriteHeader(ss); WriteDisplayUniformBuffer(ss); - DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); + DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false); ss << R"( { - float2 pos = float2(float((v_id << 1) & 2u), float(v_id & 2u)); - v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; - v_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); - - // Avoid HLSL/GLSL constructor differences by explicitly multiplying the matrix. - v_pos.xy = float2(dot(u_rotation_matrix0, v_pos.xy), dot(u_rotation_matrix1, v_pos.xy)); + v_pos = float4(a_pos, 0.0f, 1.0f); + v_tex0 = a_tex0; + // NDC space Y flip in Vulkan. #if API_VULKAN v_pos.y = -v_pos.y; #endif diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 1719cb047..6713855c6 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -5,4 +5,4 @@ #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 25; +static constexpr u32 SHADER_CACHE_VERSION = 26;