From 229cf908b7b17f00a9e8ca69d8698148c3218ba6 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 2 Mar 2023 21:35:17 +1000 Subject: [PATCH] GS/HW: Use multi stretch for preloading --- pcsx2/GS/Renderers/Common/GSDevice.cpp | 14 ++++- pcsx2/GS/Renderers/Common/GSDevice.h | 3 +- pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 14 +++-- pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 12 +++- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 69 ++++++++++++----------- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 5 +- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 11 +++- 7 files changed, 81 insertions(+), 47 deletions(-) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 7708bdbeab..ef9ca6ff09 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -264,12 +264,22 @@ void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dR StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear); } -void GSDevice::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) +void GSDevice::DrawMultiStretchRects( + const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) { for (u32 i = 0; i < num_rects; i++) { const MultiStretchRect& sr = rects[i]; - g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, shader, sr.linear); + pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); + if (rects[0].wmask.wrgba != 0xf) + { + g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, rects[0].wmask.wr, + rects[0].wmask.wg, rects[0].wmask.wb, rects[0].wmask.wa); + } + else + { + g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, shader, sr.linear); + } } } diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index aa948012ca..2e1e10193b 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -712,10 +712,11 @@ public: struct MultiStretchRect { - GSTexture* src; GSVector4 src_rect; GSVector4 dst_rect; + GSTexture* src; bool linear; + GSHWDrawConfig::ColorMaskSelector wmask; // 0xf for all channels by default }; enum BlendFactor : u8 diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 1ba2219243..cb3e9895d3 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -793,23 +793,25 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re { IASetInputLayout(m_convert.il.get()); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + VSSetShader(m_convert.vs.get(), nullptr); GSSetShader(nullptr, nullptr); PSSetShader(m_convert.ps[static_cast(shader)].get(), nullptr); - OMSetDepthStencilState(m_convert.dss.get(), 0); - OMSetBlendState(m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), 0.0f); - OMSetRenderTargets(dTex, nullptr); + + OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0); + OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr); const GSVector2 ds(static_cast(dTex->GetWidth()), static_cast(dTex->GetHeight())); GSTexture* last_tex = rects[0].src; bool last_linear = rects[0].linear; + u8 last_wmask = rects[0].wmask.wrgba; u32 first = 0; u32 count = 1; for (u32 i = 1; i < num_rects; i++) { - if (rects[i].src == last_tex && rects[i].linear == last_linear) + if (rects[i].src == last_tex && rects[i].linear == last_linear || rects[i].wmask.wrgba != last_wmask) { count++; continue; @@ -818,6 +820,7 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re DoMultiStretchRects(rects + first, count, ds); last_tex = rects[i].src; last_linear = rects[i].linear; + last_wmask = rects[i].wmask.wrgba; first += count; count = 1; } @@ -864,6 +867,9 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect PSSetShaderResource(0, rects[0].src); PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get()); + + OMSetBlendState(m_convert.bs[rects[0].wmask.wrgba].get(), 0.0f); + DrawIndexedPrimitive(); } diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 03e48d6767..d963bfd097 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -483,6 +483,7 @@ void GSDevice12::DrawMultiStretchRects( { GSTexture* last_tex = rects[0].src; bool last_linear = rects[0].linear; + u8 last_wmask = rects[0].wmask.wrgba; u32 first = 0; u32 count = 1; @@ -502,7 +503,7 @@ void GSDevice12::DrawMultiStretchRects( for (u32 i = 1; i < num_rects; i++) { - if (rects[i].src == last_tex && rects[i].linear == last_linear) + if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask) { count++; continue; @@ -511,6 +512,7 @@ void GSDevice12::DrawMultiStretchRects( DoMultiStretchRects(rects + first, count, static_cast(dTex), shader); last_tex = rects[i].src; last_linear = rects[i].linear; + last_wmask = rects[i].wmask.wrgba; first += count; count = 1; } @@ -579,12 +581,16 @@ void GSDevice12::DoMultiStretchRects( // Even though we're batching, a cmdbuffer submit could've messed this up. const GSVector4i rc(dTex->GetRect()); - OMSetRenderTargets(dTex, nullptr, rc); + OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr, rc); if (!InRenderPass()) BeginRenderPassForStretchRect(dTex, rc, rc, false); SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu); - SetPipeline(m_convert[static_cast(shader)].get()); + + pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); + SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba].get() : + m_convert[static_cast(shader)].get()); + if (ApplyUtilityState()) DrawIndexedPrimitive(); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 24957a0b69..a03fc4cd86 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2648,8 +2648,8 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR // Upload texture -> render target. const bool linear = (scale.x != 1.0f); - copy_queue[copy_count++] = {lmtex, GSVector4(rect) / GSVector4(lmtex->GetSize()).xyxy(), - GSVector4(rect) * GSVector4(scale).xyxy(), linear}; + copy_queue[copy_count++] = {GSVector4(rect) / GSVector4(lmtex->GetSize()).xyxy(), + GSVector4(rect) * GSVector4(scale).xyxy(), lmtex, linear, 0xf}; }; // The idea: loop through pages that this texture covers, find targets which overlap, and copy them in. @@ -2769,13 +2769,13 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR } GL_INS(" Copy from %d,%d -> %d,%d (%dx%d)", src_x, src_y, dst_x, dst_y, copy_width, copy_height); - copy_queue[copy_count++] = {t->m_texture, + copy_queue[copy_count++] = { (GSVector4(src_x, src_y, src_x + copy_width, src_y + copy_height) * GSVector4(t->m_texture->GetScale()).xyxy()) / GSVector4(t->m_texture->GetSize()).xyxy(), GSVector4(dst_x, dst_y, dst_x + copy_width, dst_y + copy_height) * GSVector4(scale).xyxy(), - linear}; + t->m_texture, linear, 0xf}; } row_page++; @@ -3644,6 +3644,15 @@ void GSTextureCache::Target::Update(bool reset_age) TEXA.TA0 = 0; TEXA.TA1 = 0x80; + // Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy. + // It's needed for depth, though.. filtering depth doesn't make much sense, but SMT3 needs it.. + const bool upscaled = (m_texture->GetScale().x > 1.0f); + const bool linear = (m_type == RenderTarget && upscaled); + + GSDevice::MultiStretchRect* drects = static_cast( + alloca(sizeof(GSDevice::MultiStretchRect) * static_cast(m_dirty.size()))); + u32 ndrects = 0; + const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM)); for (size_t i = 0; i < m_dirty.size(); i++) { @@ -3664,41 +3673,35 @@ void GSTextureCache::Target::Update(bool reset_age) t->Update(t_r, s_unswizzle_buffer, pitch); } + + GSDevice::MultiStretchRect& drect = drects[ndrects++]; + drect.src = t; + drect.src_rect = GSVector4(r - t_offset) / t_sizef; + drect.dst_rect = GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(); + drect.linear = linear; + // Copy the new GS memory content into the destination texture. + if (m_type == RenderTarget) + { + GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w); + drect.wmask = static_cast(m_dirty[i].rgba._u32); + } + else if (m_type == DepthStencil) + { + GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0); + drect.wmask = 0xF; + } } if (mapped) t->Unmap(); - for (size_t i = 0; i < m_dirty.size(); i++) + if (ndrects > 0) { - const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect)); - - if (r.rempty()) - continue; - - const GSVector4 sRect(GSVector4(r - t_offset) / t_sizef); - const GSVector4 dRect(GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); - // Copy the new GS memory content into the destination texture. - if (m_type == RenderTarget) - { - GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w); - if (m_dirty[i].rgba._u32 != 0xf) - { - g_gs_device->StretchRect(t, sRect, m_texture, dRect, m_dirty[i].rgba.c.r, m_dirty[i].rgba.c.g, m_dirty[i].rgba.c.b, m_dirty[i].rgba.c.a); - } - else - { - // Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy. - g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::COPY, g_gs_renderer->CanUpscale()); - } - } - else if (m_type == DepthStencil) - { - GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0); - - // FIXME linear or not? - g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::RGBA8_TO_FLOAT32_BILN); - } + // No need to sort here, it's all the one texture. + const ShaderConvert shader = (m_type == RenderTarget) ? ShaderConvert::COPY : + (upscaled ? ShaderConvert::RGBA8_TO_FLOAT32 : + ShaderConvert::RGBA8_TO_FLOAT32_BILN); + g_gs_device->DrawMultiStretchRects(drects, ndrects, m_texture, shader); } UpdateValidity(total_rect); diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 4f3e6506fa..1565950447 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1323,13 +1323,14 @@ void GSDeviceOGL::DrawMultiStretchRects( const GSVector2 ds(static_cast(dTex->GetWidth()), static_cast(dTex->GetHeight())); GSTexture* last_tex = rects[0].src; bool last_linear = rects[0].linear; + u8 last_wmask = rects[0].wmask.wrgba; u32 first = 0; u32 count = 1; for (u32 i = 1; i < num_rects; i++) { - if (rects[i].src == last_tex && rects[i].linear == last_linear) + if (rects[i].src == last_tex && rects[i].linear == last_linear || rects[i].wmask.wrgba != last_wmask) { count++; continue; @@ -1338,6 +1339,7 @@ void GSDeviceOGL::DrawMultiStretchRects( DoMultiStretchRects(rects + first, count, ds); last_tex = rects[i].src; last_linear = rects[i].linear; + last_wmask = rects[i].wmask.wrgba; first += count; count = 1; } @@ -1391,6 +1393,7 @@ void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rec PSSetShaderResource(0, rects[0].src); PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt); + OMSetColorMaskState(rects[0].wmask); DrawIndexedPrimitive(); } diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 0922ae2701..8e1a82cf4f 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -558,6 +558,7 @@ void GSDeviceVK::DrawMultiStretchRects( { GSTexture* last_tex = rects[0].src; bool last_linear = rects[0].linear; + u8 last_wmask = rects[0].wmask.wrgba; u32 first = 0; u32 count = 1; @@ -577,7 +578,7 @@ void GSDeviceVK::DrawMultiStretchRects( for (u32 i = 1; i < num_rects; i++) { - if (rects[i].src == last_tex && rects[i].linear == last_linear) + if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask) { count++; continue; @@ -586,6 +587,7 @@ void GSDeviceVK::DrawMultiStretchRects( DoMultiStretchRects(rects + first, count, static_cast(dTex), shader); last_tex = rects[i].src; last_linear = rects[i].linear; + last_wmask = rects[i].wmask.wrgba; first += count; count = 1; } @@ -651,11 +653,14 @@ void GSDeviceVK::DoMultiStretchRects( // Even though we're batching, a cmdbuffer submit could've messed this up. const GSVector4i rc(dTex->GetRect()); - OMSetRenderTargets(dTex, nullptr, rc, false); + OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr, rc, false); if (!InRenderPass() || !CheckRenderPassArea(rc)) BeginRenderPassForStretchRect(dTex, rc, rc, false); SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler); - SetPipeline(m_convert[static_cast(shader)]); + + pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); + SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba] : m_convert[static_cast(shader)]); + if (ApplyUtilityState()) DrawIndexedPrimitive(); }