diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 08811fb9c0..7708bdbeab 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -20,6 +20,8 @@ #include "Host.h" #include "common/StringUtil.h" +#include + const char* shaderName(ShaderConvert value) { switch (value) @@ -262,6 +264,23 @@ void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dR StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear); } +void GSDevice::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) +{ + for (u32 i = 0; i < num_rects; i++) + { + const MultiStretchRect& sr = rects[i]; + g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, shader, sr.linear); + } +} + +void GSDevice::SortMultiStretchRects(MultiStretchRect* rects, u32 num_rects) +{ + // Depending on num_rects, insertion sort may be better here. + std::sort(rects, rects + num_rects, [](const MultiStretchRect& lhs, const MultiStretchRect& rhs) { + return lhs.src < rhs.src || lhs.linear < rhs.linear; + }); +} + void GSDevice::ClearCurrent() { m_current = nullptr; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index bcbc1da1de..aa948012ca 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -710,6 +710,14 @@ public: } }; + struct MultiStretchRect + { + GSTexture* src; + GSVector4 src_rect; + GSVector4 dst_rect; + bool linear; + }; + enum BlendFactor : u8 { // HW blend factors @@ -828,6 +836,13 @@ public: /// Performs a screen blit for display. If dTex is null, it assumes you are writing to the system framebuffer/swap chain. virtual void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) {} + /// Same as doing StretchRect for each item, except tries to batch together rectangles in as few draws as possible. + /// The provided list should be sorted by texture, the implementations only check if it's the same as the last. + virtual void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader = ShaderConvert::COPY); + + /// Sorts a MultiStretchRect list for optimal batching. + static void SortMultiStretchRects(MultiStretchRect* rects, u32 num_rects); + /// Updates a GPU CLUT texture from a source texture. virtual void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) {} diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 39f63ba428..fdcd549919 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -553,11 +553,118 @@ void GSDeviceVK::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* m_present[static_cast(shader)], linear); } -void GSDeviceVK::BeginRenderPassForStretchRect(GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc) +void GSDeviceVK::DrawMultiStretchRects( + const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) +{ + GSTexture* last_tex = rects[0].src; + bool last_linear = rects[0].linear; + + u32 first = 0; + u32 count = 1; + + // Make sure all textures are in shader read only layout, so we don't need to break + // the render pass to transition. + for (u32 i = 0; i < num_rects; i++) + { + GSTextureVK* const stex = static_cast(rects[i].src); + stex->CommitClear(); + if (stex->GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + EndRenderPass(); + stex->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + } + + for (u32 i = 1; i < num_rects; i++) + { + if (rects[i].src == last_tex && rects[i].linear == last_linear) + { + count++; + continue; + } + + DoMultiStretchRects(rects + first, count, static_cast(dTex), shader); + last_tex = rects[i].src; + last_linear = rects[i].linear; + first += count; + count = 0; + } + + DoMultiStretchRects(rects + first, count, static_cast(dTex), shader); +} + +void GSDeviceVK::DoMultiStretchRects( + const MultiStretchRect* rects, u32 num_rects, GSTextureVK* dTex, ShaderConvert shader) +{ + // Set up vertices first. + const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1); + const u32 index_reserve_size = num_rects * 6 * sizeof(u32); + if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) || + !m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32))) + { + ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to vertex buffer"); + if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) || + !m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32))) + { + pxFailRel("Failed to reserve space for vertices"); + } + } + + // Pain in the arse because the primitive topology for the pipelines is all triangle strips. + // Don't use primitive restart here, it ends up slower on some drivers. + const GSVector2 ds(static_cast(dTex->GetWidth()), static_cast(dTex->GetHeight())); + GSVertexPT1* verts = reinterpret_cast(m_vertex_stream_buffer.GetCurrentHostPointer()); + u32* idx = reinterpret_cast(m_index_stream_buffer.GetCurrentHostPointer()); + u32 icount = 0; + u32 vcount = 0; + for (u32 i = 0; i < num_rects; i++) + { + const GSVector4& sRect = rects[i].src_rect; + const GSVector4& dRect = rects[i].dst_rect; + const float left = dRect.x * 2 / ds.x - 1.0f; + const float top = 1.0f - dRect.y * 2 / ds.y; + const float right = dRect.z * 2 / ds.x - 1.0f; + const float bottom = 1.0f - dRect.w * 2 / ds.y; + + const u32 vstart = vcount; + verts[vcount++] = {GSVector4(left, top, 0.5f, 1.0f), GSVector2(sRect.x, sRect.y)}; + verts[vcount++] = {GSVector4(right, top, 0.5f, 1.0f), GSVector2(sRect.z, sRect.y)}; + verts[vcount++] = {GSVector4(left, bottom, 0.5f, 1.0f), GSVector2(sRect.x, sRect.w)}; + verts[vcount++] = {GSVector4(right, bottom, 0.5f, 1.0f), GSVector2(sRect.z, sRect.w)}; + + if (i > 0) + idx[icount++] = vstart; + + idx[icount++] = vstart; + idx[icount++] = vstart + 1; + idx[icount++] = vstart + 2; + idx[icount++] = vstart + 3; + idx[icount++] = vstart + 3; + }; + + m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1); + m_vertex.count = vcount; + m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32); + m_index.count = icount; + m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1)); + m_index_stream_buffer.CommitMemory(icount * sizeof(u32)); + + // Even though we're batching, a cmdbuffer submit could've messed this up. + const GSVector4i rc(dTex->GetRect()); + OMSetRenderTargets(dTex, nullptr, dTex->GetRect(), false); + if (!InRenderPass() || !CheckRenderPassArea(rc)) + BeginRenderPassForStretchRect(dTex, rc, rc, false); + SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler); + SetPipeline(m_convert[static_cast(shader)]); + if (ApplyUtilityState()) + DrawIndexedPrimitive(); +} + +void GSDeviceVK::BeginRenderPassForStretchRect( + GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc, bool allow_discard) { - const bool is_whole_target = dst_rc.eq(dtex_rc); const VkAttachmentLoadOp load_op = - is_whole_target ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : GetLoadOpForTexture(dTex); + (allow_discard && dst_rc.eq(dtex_rc)) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : GetLoadOpForTexture(dTex); dTex->SetState(GSTexture::State::Dirty); if (dTex->GetType() == GSTexture::Type::DepthStencil) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 60a825aeca..4785e187c5 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -232,8 +232,11 @@ public: bool green, bool blue, bool alpha) override; void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; + void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override; + void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTextureVK* dTex, ShaderConvert shader); - void BeginRenderPassForStretchRect(GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc); + void BeginRenderPassForStretchRect( + GSTextureVK* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc, bool allow_discard = true); void DoStretchRect(GSTextureVK* sTex, const GSVector4& sRect, GSTextureVK* dTex, const GSVector4& dRect, VkPipeline pipeline, bool linear); void DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds);