From 23d98e9352f01e3a85914b7bb2d25c2265c712c0 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sun, 17 Mar 2024 16:59:34 +0000 Subject: [PATCH] GS/HW: Optimize RTA correction to reduce copies --- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/Common/GSDevice.cpp | 2 +- pcsx2/GS/Renderers/Common/GSDevice.h | 2 +- pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 4 +- pcsx2/GS/Renderers/DX11/GSDevice11.h | 2 +- pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 46 ++++++++++++++++----- pcsx2/GS/Renderers/DX12/GSDevice12.h | 4 +- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 5 +-- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 20 +++++++-- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 50 ++++++++++++++--------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- pcsx2/GS/Renderers/Metal/GSDeviceMTL.h | 4 +- pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 9 ++-- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 4 +- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h | 2 +- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 50 ++++++++++++++++++----- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 4 +- 17 files changed, 143 insertions(+), 68 deletions(-) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 6b209b75d4..7496a27726 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -214,6 +214,7 @@ public: bool m_texflush_flag = false; bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; + bool m_can_correct_alpha = false; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index b94eb29371..21ab735332 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -582,7 +582,7 @@ void GSDevice::DrawMultiStretchRects( for (u32 i = 0; i < num_rects; i++) { const MultiStretchRect& sr = rects[i]; - pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); + pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); if (rects[0].wmask.wrgba != 0xf) { g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, rects[0].wmask.wr, diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index a0fa004746..4a9050aaf0 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -916,7 +916,7 @@ public: virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) = 0; virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) = 0; - virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) = 0; + virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) = 0; void StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true); diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 016cafe5b7..f575e18ba0 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1242,11 +1242,11 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* StretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), linear); } -void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) +void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader) { const u8 index = static_cast(red) | (static_cast(green) << 1) | (static_cast(blue) << 2) | (static_cast(alpha) << 3); - StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast(ShaderConvert::COPY)].get(), nullptr, + StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast(shader)].get(), nullptr, m_convert.bs[index].get(), false); } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.h b/pcsx2/GS/Renderers/DX11/GSDevice11.h index 790ba68472..78e97c3e2a 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.h +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.h @@ -314,7 +314,7 @@ public: void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override; + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true); void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 206a6a8046..00e1b4ce7a 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -1394,7 +1394,7 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* } void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, - bool green, bool blue, bool alpha) + bool green, bool blue, bool alpha, ShaderConvert shader) { GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha); @@ -1569,8 +1569,9 @@ void GSDevice12::DoMultiStretchRects( SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu); - pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); - SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba].get() : + pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); + int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; + SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba + rta_offset].get() : m_convert[static_cast(shader)].get()); if (ApplyUtilityState()) @@ -2448,17 +2449,42 @@ bool GSDevice12::CompileConvertPipelines() // compile color copy pipelines gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); - for (u32 i = 0; i < 16; i++) + for (u32 j = 0; j < 16; j++) { - pxAssert(!m_color_copy[i]); + pxAssert(!m_color_copy[j]); gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, - D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast(i)); - m_color_copy[i] = gpb.Create(m_device.get(), m_shader_cache, false); - if (!m_color_copy[i]) + D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast(j)); + m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false); + if (!m_color_copy[j]) return false; - D3D12::SetObjectName(m_color_copy[i].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})", - i & 1u, (i >> 1) & 1u, (i >> 2) & 1u, (i >> 3) & 1u)); + D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})", + j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u)); + } + } + else if (i == ShaderConvert::RTA_CORRECTION) + { + // compile color copy pipelines + gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); + + ComPtr ps(GetUtilityPixelShader(*shader, shaderName(i))); + if (!ps) + return false; + + gpb.SetPixelShader(ps.get()); + + for (u32 j = 16; j < 32; j++) + { + pxAssert(!m_color_copy[j]); + gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, + D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast(j - 16)); + m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false); + if (!m_color_copy[j]) + return false; + + D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})", + j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u)); } } else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE) diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index f0104ed90f..ce21b8df1f 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -312,7 +312,7 @@ private: std::array, static_cast(ShaderConvert::Count)> m_convert{}; std::array, static_cast(PresentShader::Count)> m_present{}; - std::array, 16> m_color_copy{}; + std::array, 32> m_color_copy{}; std::array, 2> m_merge{}; std::array, NUM_INTERLACE_SHADERS> m_interlace{}; std::array, 2> m_hdr_setup_pipelines{}; // [depth] @@ -432,7 +432,7 @@ public: void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, - bool green, bool blue, bool alpha) override; + bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override; void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; void UpdateCLUTTexture( diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 208ae102a8..853be4f7f1 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -681,9 +681,6 @@ bool GSHwHack::GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip) if (RFBMSK != 0x00FFFFFFu) { GL_PUSH("GSC_PolyphonyDigitalGames(): HLE Gran Turismo RGB channel shuffle"); - - src->m_alpha_max = 255; - src->m_alpha_min = 0; GSHWDrawConfig& config = r.BeginHLEHardwareDraw( src->GetTexture(), nullptr, src->GetScale(), src->GetTexture(), src->GetScale(), src->GetUnscaledRect()); config.pal = palette->GetPaletteGSTexture(); @@ -732,7 +729,7 @@ bool GSHwHack::GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip) // Need the alpha channel. dst->m_TEX0.PSM = PSMCT32; - + dst->m_rt_alpha_scale = false; // Alpha is unknown, since it comes from RGB. dst->m_alpha_min = 0; dst->m_alpha_max = 255; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 9b31903cde..7e487b509a 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2924,7 +2924,11 @@ void GSRendererHW::Draw() if (rt) { if (m_texture_shuffle || m_channel_shuffle || (!rt->m_dirty.empty() && !rt->m_dirty.GetTotalRect(rt->m_TEX0, rt->m_unscaled_size).rintersect(m_r).rempty())) - rt->Update(); + { + const u32 alpha = m_cached_ctx.FRAME.FBMSK >> 24; + const u32 alpha_mask = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk >> 24; + rt->Update(m_texture_shuffle || m_channel_shuffle || (alpha != 0 && (alpha & alpha_mask) != alpha_mask) || (!alpha && GetAlphaMinMax().max > 128)); + } else rt->m_age = 0; } @@ -4296,8 +4300,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_b = 0; m_conf.ps.blend_d = 0; - const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (std::max(rt_alpha_max, rt->m_alpha_max) > 128) || m_conf.ps.fbmask || m_conf.ps.tex_is_fb; - const bool rta_correction = !rta_decorrection && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX); + const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX); if (rta_correction) { rt->RTACorrect(rt); @@ -5303,9 +5306,11 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta // If we Correct/Decorrect and tex is rt, we will need to update the texture reference const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture; + m_can_correct_alpha = true; + if (rt) { - const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || std::max(blend_alpha_max, rt->m_alpha_max) > 128 || m_conf.ps.fbmask || m_conf.ps.tex_is_fb; + const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || ((m_conf.colormask.wrgba & 0x8) && (std::max(blend_alpha_max, rt->m_alpha_max) > 128) || (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0)); if (rta_decorrection) { @@ -5313,6 +5318,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { if (m_conf.ps.read_ba) { + m_can_correct_alpha = false; + rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; @@ -5323,10 +5330,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { if (!(m_cached_ctx.FRAME.FBMSK & 0xFFFC0000)) { + m_can_correct_alpha = false; rt->m_rt_alpha_scale = false; } else if (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000) { + m_can_correct_alpha = false; rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; @@ -5339,6 +5348,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || ((m_cached_ctx.FRAME.FBMSK & 0xFF000000) != 0xFF000000)) { + m_can_correct_alpha = false; rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; @@ -5348,10 +5358,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta } else if (rt->m_last_draw == s_n) { + m_can_correct_alpha = false; rt->m_rt_alpha_scale = false; } else { + m_can_correct_alpha = false; rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 9ad5ec3c84..b0b1c25e7c 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2357,6 +2357,8 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe if (dst->m_dirty.empty()) dst->m_rt_alpha_scale = true; + else + dst->m_last_draw -= 1; // If we preload and it needs to decorrect and we couldn't catch it early, we need to make sure it decorrects the data. pxAssert(dst && dst->m_texture && dst->m_scale == scale); return dst; @@ -2662,9 +2664,12 @@ void GSTextureCache::Target::RTACorrect(Target* rt) if (rt && !rt->m_rt_alpha_scale && rt->m_type == RenderTarget) { const GSVector2i rtsize(rt->m_texture->GetSize()); - if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false)) + const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale)); + + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect))) { - const GSVector4 dRect(rt->m_texture->GetRect()); + // Only copy up the valid area, since there's no point in "correcting" nothing. + const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect)); const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -2680,9 +2685,12 @@ void GSTextureCache::Target::RTADecorrect(Target* rt) if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget) { const GSVector2i rtsize(rt->m_texture->GetSize()); - if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false)) + const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale)); + + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect))) { - const GSVector4 dRect(rt->m_texture->GetRect()); + // Only copy up the valid area, since there's no point in "correcting" nothing. + const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect)); const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -3522,8 +3530,6 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r if (exact_bp && !dirty_rect.rintersect(targetr).rempty()) t->Update(); - t->RTADecorrect(t); - Read(t, targetr); // Try to cut down how much we read next, if we can. @@ -5422,7 +5428,11 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r) else { fmt = GSTexture::Format::Color; - ps_shader = ShaderConvert::COPY; + if (t->m_rt_alpha_scale) + ps_shader = ShaderConvert::RTA_DECORRECTION; + else + ps_shader = ShaderConvert::COPY; + dltex = &m_color_download_texture; } } @@ -5472,7 +5482,7 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r) const GSVector4 src(GSVector4(r) * GSVector4(t->m_scale) / GSVector4(t->m_texture->GetSize()).xyxy()); const GSVector4i drc(0, 0, r.width(), r.height()); - const bool direct_read = (t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY); + const bool direct_read = t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY; if (!PrepareDownloadTexture(drc.z, drc.w, fmt, dltex)) return; @@ -5950,7 +5960,7 @@ GSTextureCache::Target::~Target() #endif } -void GSTextureCache::Target::Update() +void GSTextureCache::Target::Update(bool cannot_scale) { m_age = 0; @@ -5981,15 +5991,6 @@ void GSTextureCache::Target::Update() return; } - if (m_dirty.size() != 1 || !total_rect.eq(m_valid) && (m_dirty.GetDirtyChannels() & 0x8)) - { - this->RTADecorrect(this); - } - else - { - m_rt_alpha_scale = false; - } - const GSVector4i t_offset(total_rect.xyxy()); const GSVector4i t_size(total_rect - t_offset); const GSVector4 t_sizef(t_size.zwzw()); @@ -6006,7 +6007,7 @@ void GSTextureCache::Target::Update() const bool mapped = t->Map(m); GIFRegTEXA TEXA = {}; - TEXA.AEM = 1; + TEXA.AEM = 0; TEXA.TA0 = 0; TEXA.TA1 = 0x80; @@ -6097,6 +6098,14 @@ void GSTextureCache::Target::Update() if (ndrects > 0) { + if (m_type == RenderTarget && transferring_alpha && bpp >= 16) + { + if (alpha_minmax.second > 128 || (m_TEX0.PSM & 0xf) == PSMCT24) + this->RTADecorrect(this); + else if (!cannot_scale && total_rect.eq(m_valid)) + m_rt_alpha_scale = true; + } + ShaderConvert depth_shader = upscaled ? ShaderConvert::RGBA8_TO_FLOAT32_BILN : ShaderConvert::RGBA8_TO_FLOAT32; if (m_type == DepthStencil && GSLocalMemory::m_psm[m_TEX0.PSM].trbpp != 32) { @@ -6113,8 +6122,9 @@ void GSTextureCache::Target::Update() } } + const ShaderConvert rt_shader = m_rt_alpha_scale ? ShaderConvert::RTA_CORRECTION : ShaderConvert::COPY; // No need to sort here, it's all the one texture. - const ShaderConvert shader = (m_type == RenderTarget) ? ShaderConvert::COPY : depth_shader; + const ShaderConvert shader = (m_type == RenderTarget) ? rt_shader : depth_shader; g_gs_device->DrawMultiStretchRects(drects, ndrects, m_texture, shader); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 332192cf97..e36f692775 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -244,7 +244,7 @@ public: void RTACorrect(Target* rt); void RTADecorrect(Target* rt); - void Update(); + void Update(bool cannot_scale = false); /// Updates the target, if the dirty area intersects with the specified rectangle. void UpdateIfDirtyIntersects(const GSVector4i& rc); diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 02480d4385..a34bc51a74 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -244,7 +244,7 @@ public: MRCOwned> m_cas_pipeline[2]; MRCOwned> m_convert_pipeline[static_cast(ShaderConvert::Count)]; MRCOwned> m_present_pipeline[static_cast(PresentShader::Count)]; - MRCOwned> m_convert_pipeline_copy_mask[1 << 4]; + MRCOwned> m_convert_pipeline_copy_mask[1 << 5]; MRCOwned> m_merge_pipeline[4]; MRCOwned> m_interlace_pipeline[NUM_INTERLACE_SHADERS]; MRCOwned> m_datm_pipeline[4]; @@ -407,7 +407,7 @@ public: /// Copy from a position in sTex to the same position in the currently active render encoder using the given fs pipeline and rect void RenderCopy(GSTexture* sTex, id pipeline, const GSVector4i& rect); void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override; + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override; void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override; void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 63a286403d..23ab9cb7b7 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1033,6 +1033,7 @@ bool GSDeviceMTL::Create() auto vs_convert = LoadShader(@"vs_convert"); auto fs_triangle = LoadShader(@"fs_triangle"); auto ps_copy = LoadShader(@"ps_copy"); + auto ps_copy_rta_correct = LoadShader(@"ps_rta_correction"); auto pdesc = [[MTLRenderPipelineDescriptor new] autorelease]; // FS Triangle Pipelines pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); @@ -1153,7 +1154,7 @@ bool GSDeviceMTL::Create() if (i & 8) mask |= MTLColorWriteMaskAlpha; NSString* name = [NSString stringWithFormat:@"copy_%s%s%s%s", i & 1 ? "r" : "", i & 2 ? "g" : "", i & 4 ? "b" : "", i & 8 ? "a" : ""]; pdesc.colorAttachments[0].writeMask = mask; - m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, ps_copy, name); + m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, (i >= 16) ? ps_copy_rta_correct : ps_copy, name); } pdesc.colorAttachments[0].blendingEnabled = YES; @@ -1572,7 +1573,7 @@ void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture DoStretchRect(sTex, sRect, dTex, dRect, pipeline, linear, load_action, nullptr, 0); }} -void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) +void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader) { @autoreleasepool { int sel = 0; if (red) sel |= 1; @@ -1580,7 +1581,7 @@ void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture if (blue) sel |= 4; if (alpha) sel |= 8; - id pipeline = m_convert_pipeline_copy_mask[sel]; + id pipeline = m_convert_pipeline_copy_mask[(shader == ShaderConvert::RTA_CORRECTION) ? (sel + 16) : sel]; DoStretchRect(sTex, sRect, dTex, dRect, pipeline, false, sel == 15 ? LoadAction::DontCareIfFull : LoadAction::Load, nullptr, 0); }} @@ -1642,7 +1643,7 @@ void GSDeviceMTL::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_r const u32 vertex_count = end - start; const u32 index_count = vertex_count + (vertex_count >> 1); // 6 indices per 4 vertices id new_pipeline = wmask == 0xf ? m_convert_pipeline[static_cast(shader)] - : m_convert_pipeline_copy_mask[wmask]; + : m_convert_pipeline_copy_mask[(shader == ShaderConvert::RTA_CORRECTION) ? (wmask + 16) : wmask]; if (new_pipeline != pipeline) { pipeline = new_pipeline; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 06ee84a16c..1449f793c4 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1454,7 +1454,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture StretchRect(sTex, sRect, dTex, dRect, ps, false, OMColorMaskSelector(), linear); } -void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) +void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader) { OMColorMaskSelector cms; @@ -1463,7 +1463,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture cms.wb = blue; cms.wa = alpha; - StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)ShaderConvert::COPY], false, cms, false); + StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], false, cms, false); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear) diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index 2106c49bcd..3126107052 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -319,7 +319,7 @@ public: void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool linear = true); - void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override; + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true); void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override; diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 736b94a57e..96c107d97a 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -2917,13 +2917,14 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* } void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, - bool green, bool blue, bool alpha) + bool green, bool blue, bool alpha, ShaderConvert shader) { GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha); const u32 index = (red ? 1 : 0) | (green ? 2 : 0) | (blue ? 4 : 0) | (alpha ? 8 : 0); const bool allow_discard = (index == 0xf); - DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, m_color_copy[index], + int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; + DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, m_color_copy[index + rta_offset], false, allow_discard); } @@ -3046,9 +3047,10 @@ void GSDeviceVK::DoMultiStretchRects( BeginRenderPassForStretchRect(dTex, rc, rc, false); SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler); - pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf); + pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); + int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; SetPipeline( - (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba] : m_convert[static_cast(shader)]); + (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba + rta_offset] : m_convert[static_cast(shader)]); if (ApplyUtilityState()) DrawIndexedPrimitive(); @@ -4018,18 +4020,44 @@ bool GSDeviceVK::CompileConvertPipelines() { // compile color copy pipelines gpb.SetRenderPass(m_utility_color_render_pass_discard, 0); - for (u32 i = 0; i < 16; i++) + for (u32 j = 0; j < 16; j++) { - pxAssert(!m_color_copy[i]); + pxAssert(!m_color_copy[j]); gpb.ClearBlendAttachments(); gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast(i)); - m_color_copy[i] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); - if (!m_color_copy[i]) + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast(j)); + m_color_copy[j] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_color_copy[j]) return false; - Vulkan::SetObjectName(m_device, m_color_copy[i], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", i & 1u, - (i >> 1) & 1u, (i >> 2) & 1u, (i >> 3) & 1u); + Vulkan::SetObjectName(m_device, m_color_copy[j], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", j & 1u, + (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u); + } + } + else if (i == ShaderConvert::RTA_CORRECTION) + { + // compile color copy pipelines + gpb.SetRenderPass(m_utility_color_render_pass_discard, 0); + VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i)); + if (ps == VK_NULL_HANDLE) + return false; + + ScopedGuard ps_guard([this, &ps]() { vkDestroyShaderModule(m_device, ps, nullptr); }); + gpb.SetFragmentShader(ps); + + for (u32 j = 16; j < 32; j++) + { + pxAssert(!m_color_copy[j]); + gpb.ClearBlendAttachments(); + + gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast(j - 16)); + m_color_copy[j] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_color_copy[j]) + return false; + + Vulkan::SetObjectName(m_device, m_color_copy[j], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", j & 1u, + (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u); } } else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 1982d5c42b..d561010744 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -416,7 +416,7 @@ private: std::array(ShaderConvert::Count)> m_convert{}; std::array(PresentShader::Count)> m_present{}; - std::array m_color_copy{}; + std::array m_color_copy{}; std::array m_merge{}; std::array m_interlace{}; VkPipeline m_hdr_setup_pipelines[2][2] = {}; // [depth][feedback_loop] @@ -551,7 +551,7 @@ public: void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, - bool green, bool blue, bool alpha) override; + bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override; void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; void DrawMultiStretchRects(