GS/HW: Optimize RTA correction to reduce copies

This commit is contained in:
refractionpcsx2 2024-03-17 16:59:34 +00:00
parent 8f381a4e16
commit 23d98e9352
17 changed files with 143 additions and 68 deletions

View File

@ -214,6 +214,7 @@ public:
bool m_texflush_flag = false;
bool m_isPackedUV_HackFlag = false;
bool m_channel_shuffle = false;
bool m_can_correct_alpha = false;
u8 m_scanmask_used = 0;
u32 m_dirty_gs_regs = 0;
int m_backed_up_ctx = 0;

View File

@ -582,7 +582,7 @@ void GSDevice::DrawMultiStretchRects(
for (u32 i = 0; i < num_rects; i++)
{
const MultiStretchRect& sr = rects[i];
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf);
if (rects[0].wmask.wrgba != 0xf)
{
g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, rects[0].wmask.wr,

View File

@ -916,7 +916,7 @@ public:
virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) = 0;
virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) = 0;
virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) = 0;
virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) = 0;
void StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true);

View File

@ -1242,11 +1242,11 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
StretchRect(sTex, sRect, dTex, dRect, ps, ps_cb, m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), linear);
}
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha)
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader)
{
const u8 index = static_cast<u8>(red) | (static_cast<u8>(green) << 1) | (static_cast<u8>(blue) << 2) |
(static_cast<u8>(alpha) << 3);
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(ShaderConvert::COPY)].get(), nullptr,
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr,
m_convert.bs[index].get(), false);
}

View File

@ -314,7 +314,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear = true);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;

View File

@ -1394,7 +1394,7 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
}
void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
bool green, bool blue, bool alpha)
bool green, bool blue, bool alpha, ShaderConvert shader)
{
GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha);
@ -1569,8 +1569,9 @@ void GSDevice12::DoMultiStretchRects(
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu);
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba].get() :
pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf);
int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0;
SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba + rta_offset].get() :
m_convert[static_cast<int>(shader)].get());
if (ApplyUtilityState())
@ -2448,17 +2449,42 @@ bool GSDevice12::CompileConvertPipelines()
// compile color copy pipelines
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
for (u32 i = 0; i < 16; i++)
for (u32 j = 0; j < 16; j++)
{
pxAssert(!m_color_copy[i]);
pxAssert(!m_color_copy[j]);
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast<u8>(i));
m_color_copy[i] = gpb.Create(m_device.get(), m_shader_cache, false);
if (!m_color_copy[i])
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast<u8>(j));
m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false);
if (!m_color_copy[j])
return false;
D3D12::SetObjectName(m_color_copy[i].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})",
i & 1u, (i >> 1) & 1u, (i >> 2) & 1u, (i >> 3) & 1u));
D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})",
j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u));
}
}
else if (i == ShaderConvert::RTA_CORRECTION)
{
// compile color copy pipelines
gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM);
gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN);
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, shaderName(i)));
if (!ps)
return false;
gpb.SetPixelShader(ps.get());
for (u32 j = 16; j < 32; j++)
{
pxAssert(!m_color_copy[j]);
gpb.SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE,
D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, static_cast<u8>(j - 16));
m_color_copy[j] = gpb.Create(m_device.get(), m_shader_cache, false);
if (!m_color_copy[j])
return false;
D3D12::SetObjectName(m_color_copy[j].get(), TinyString::from_fmt("Color copy pipeline (r={}, g={}, b={}, a={})",
j & 1u, (j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u));
}
}
else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE)

View File

@ -312,7 +312,7 @@ private:
std::array<ComPtr<ID3D12PipelineState>, static_cast<int>(ShaderConvert::Count)> m_convert{};
std::array<ComPtr<ID3D12PipelineState>, static_cast<int>(PresentShader::Count)> m_present{};
std::array<ComPtr<ID3D12PipelineState>, 16> m_color_copy{};
std::array<ComPtr<ID3D12PipelineState>, 32> m_color_copy{};
std::array<ComPtr<ID3D12PipelineState>, 2> m_merge{};
std::array<ComPtr<ID3D12PipelineState>, NUM_INTERLACE_SHADERS> m_interlace{};
std::array<ComPtr<ID3D12PipelineState>, 2> m_hdr_setup_pipelines{}; // [depth]
@ -432,7 +432,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
bool green, bool blue, bool alpha) override;
bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(

View File

@ -681,9 +681,6 @@ bool GSHwHack::GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip)
if (RFBMSK != 0x00FFFFFFu)
{
GL_PUSH("GSC_PolyphonyDigitalGames(): HLE Gran Turismo RGB channel shuffle");
src->m_alpha_max = 255;
src->m_alpha_min = 0;
GSHWDrawConfig& config = r.BeginHLEHardwareDraw(
src->GetTexture(), nullptr, src->GetScale(), src->GetTexture(), src->GetScale(), src->GetUnscaledRect());
config.pal = palette->GetPaletteGSTexture();
@ -732,7 +729,7 @@ bool GSHwHack::GSC_PolyphonyDigitalGames(GSRendererHW& r, int& skip)
// Need the alpha channel.
dst->m_TEX0.PSM = PSMCT32;
dst->m_rt_alpha_scale = false;
// Alpha is unknown, since it comes from RGB.
dst->m_alpha_min = 0;
dst->m_alpha_max = 255;

View File

@ -2924,7 +2924,11 @@ void GSRendererHW::Draw()
if (rt)
{
if (m_texture_shuffle || m_channel_shuffle || (!rt->m_dirty.empty() && !rt->m_dirty.GetTotalRect(rt->m_TEX0, rt->m_unscaled_size).rintersect(m_r).rempty()))
rt->Update();
{
const u32 alpha = m_cached_ctx.FRAME.FBMSK >> 24;
const u32 alpha_mask = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk >> 24;
rt->Update(m_texture_shuffle || m_channel_shuffle || (alpha != 0 && (alpha & alpha_mask) != alpha_mask) || (!alpha && GetAlphaMinMax().max > 128));
}
else
rt->m_age = 0;
}
@ -4296,8 +4300,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_b = 0;
m_conf.ps.blend_d = 0;
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || (std::max(rt_alpha_max, rt->m_alpha_max) > 128) || m_conf.ps.fbmask || m_conf.ps.tex_is_fb;
const bool rta_correction = !rta_decorrection && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX);
const bool rta_correction = m_can_correct_alpha && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1 && !(blend_flag & BLEND_A_MAX);
if (rta_correction)
{
rt->RTACorrect(rt);
@ -5303,9 +5306,11 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
m_can_correct_alpha = true;
if (rt)
{
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || std::max(blend_alpha_max, rt->m_alpha_max) > 128 || m_conf.ps.fbmask || m_conf.ps.tex_is_fb;
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || ((m_conf.colormask.wrgba & 0x8) && (std::max(blend_alpha_max, rt->m_alpha_max) > 128) || (m_conf.ps.fbmask && m_conf.cb_ps.FbMask.a != 0xFF && m_conf.cb_ps.FbMask.a != 0));
if (rta_decorrection)
{
@ -5313,6 +5318,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
if (m_conf.ps.read_ba)
{
m_can_correct_alpha = false;
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
@ -5323,10 +5330,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
if (!(m_cached_ctx.FRAME.FBMSK & 0xFFFC0000))
{
m_can_correct_alpha = false;
rt->m_rt_alpha_scale = false;
}
else if (m_cached_ctx.FRAME.FBMSK & 0xFFFC0000)
{
m_can_correct_alpha = false;
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
@ -5339,6 +5348,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
if (m_conf.ps.tales_of_abyss_hle || (tex && tex->m_from_target && tex->m_from_target == rt && m_conf.ps.channel == ChannelFetch_ALPHA) || ((m_cached_ctx.FRAME.FBMSK & 0xFF000000) != 0xFF000000))
{
m_can_correct_alpha = false;
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
@ -5348,10 +5358,12 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
}
else if (rt->m_last_draw == s_n)
{
m_can_correct_alpha = false;
rt->m_rt_alpha_scale = false;
}
else
{
m_can_correct_alpha = false;
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;

View File

@ -2357,6 +2357,8 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
if (dst->m_dirty.empty())
dst->m_rt_alpha_scale = true;
else
dst->m_last_draw -= 1; // If we preload and it needs to decorrect and we couldn't catch it early, we need to make sure it decorrects the data.
pxAssert(dst && dst->m_texture && dst->m_scale == scale);
return dst;
@ -2662,9 +2664,12 @@ void GSTextureCache::Target::RTACorrect(Target* rt)
if (rt && !rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{
const GSVector2i rtsize(rt->m_texture->GetSize());
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false))
const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
{
const GSVector4 dRect(rt->m_texture->GetRect());
// Only copy up the valid area, since there's no point in "correcting" nothing.
const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect));
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
@ -2680,9 +2685,12 @@ void GSTextureCache::Target::RTADecorrect(Target* rt)
if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{
const GSVector2i rtsize(rt->m_texture->GetSize());
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false))
const GSVector4i valid_rect = GSVector4i(GSVector4(rt->m_valid) * GSVector4(rt->m_scale));
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect)))
{
const GSVector4 dRect(rt->m_texture->GetRect());
// Only copy up the valid area, since there's no point in "correcting" nothing.
const GSVector4 dRect(rt->m_texture->GetRect().rintersect(valid_rect));
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
@ -3522,8 +3530,6 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
if (exact_bp && !dirty_rect.rintersect(targetr).rempty())
t->Update();
t->RTADecorrect(t);
Read(t, targetr);
// Try to cut down how much we read next, if we can.
@ -5422,7 +5428,11 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
else
{
fmt = GSTexture::Format::Color;
ps_shader = ShaderConvert::COPY;
if (t->m_rt_alpha_scale)
ps_shader = ShaderConvert::RTA_DECORRECTION;
else
ps_shader = ShaderConvert::COPY;
dltex = &m_color_download_texture;
}
}
@ -5472,7 +5482,7 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
const GSVector4 src(GSVector4(r) * GSVector4(t->m_scale) / GSVector4(t->m_texture->GetSize()).xyxy());
const GSVector4i drc(0, 0, r.width(), r.height());
const bool direct_read = (t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY);
const bool direct_read = t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY;
if (!PrepareDownloadTexture(drc.z, drc.w, fmt, dltex))
return;
@ -5950,7 +5960,7 @@ GSTextureCache::Target::~Target()
#endif
}
void GSTextureCache::Target::Update()
void GSTextureCache::Target::Update(bool cannot_scale)
{
m_age = 0;
@ -5981,15 +5991,6 @@ void GSTextureCache::Target::Update()
return;
}
if (m_dirty.size() != 1 || !total_rect.eq(m_valid) && (m_dirty.GetDirtyChannels() & 0x8))
{
this->RTADecorrect(this);
}
else
{
m_rt_alpha_scale = false;
}
const GSVector4i t_offset(total_rect.xyxy());
const GSVector4i t_size(total_rect - t_offset);
const GSVector4 t_sizef(t_size.zwzw());
@ -6006,7 +6007,7 @@ void GSTextureCache::Target::Update()
const bool mapped = t->Map(m);
GIFRegTEXA TEXA = {};
TEXA.AEM = 1;
TEXA.AEM = 0;
TEXA.TA0 = 0;
TEXA.TA1 = 0x80;
@ -6097,6 +6098,14 @@ void GSTextureCache::Target::Update()
if (ndrects > 0)
{
if (m_type == RenderTarget && transferring_alpha && bpp >= 16)
{
if (alpha_minmax.second > 128 || (m_TEX0.PSM & 0xf) == PSMCT24)
this->RTADecorrect(this);
else if (!cannot_scale && total_rect.eq(m_valid))
m_rt_alpha_scale = true;
}
ShaderConvert depth_shader = upscaled ? ShaderConvert::RGBA8_TO_FLOAT32_BILN : ShaderConvert::RGBA8_TO_FLOAT32;
if (m_type == DepthStencil && GSLocalMemory::m_psm[m_TEX0.PSM].trbpp != 32)
{
@ -6113,8 +6122,9 @@ void GSTextureCache::Target::Update()
}
}
const ShaderConvert rt_shader = m_rt_alpha_scale ? ShaderConvert::RTA_CORRECTION : ShaderConvert::COPY;
// No need to sort here, it's all the one texture.
const ShaderConvert shader = (m_type == RenderTarget) ? ShaderConvert::COPY : depth_shader;
const ShaderConvert shader = (m_type == RenderTarget) ? rt_shader : depth_shader;
g_gs_device->DrawMultiStretchRects(drects, ndrects, m_texture, shader);
}

View File

@ -244,7 +244,7 @@ public:
void RTACorrect(Target* rt);
void RTADecorrect(Target* rt);
void Update();
void Update(bool cannot_scale = false);
/// Updates the target, if the dirty area intersects with the specified rectangle.
void UpdateIfDirtyIntersects(const GSVector4i& rc);

View File

@ -244,7 +244,7 @@ public:
MRCOwned<id<MTLComputePipelineState>> m_cas_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy_mask[1 << 4];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy_mask[1 << 5];
MRCOwned<id<MTLRenderPipelineState>> m_merge_pipeline[4];
MRCOwned<id<MTLRenderPipelineState>> m_interlace_pipeline[NUM_INTERLACE_SHADERS];
MRCOwned<id<MTLRenderPipelineState>> m_datm_pipeline[4];
@ -407,7 +407,7 @@ public:
/// Copy from a position in sTex to the same position in the currently active render encoder using the given fs pipeline and rect
void RenderCopy(GSTexture* sTex, id<MTLRenderPipelineState> pipeline, const GSVector4i& rect);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;

View File

@ -1033,6 +1033,7 @@ bool GSDeviceMTL::Create()
auto vs_convert = LoadShader(@"vs_convert");
auto fs_triangle = LoadShader(@"fs_triangle");
auto ps_copy = LoadShader(@"ps_copy");
auto ps_copy_rta_correct = LoadShader(@"ps_rta_correction");
auto pdesc = [[MTLRenderPipelineDescriptor new] autorelease];
// FS Triangle Pipelines
pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color);
@ -1153,7 +1154,7 @@ bool GSDeviceMTL::Create()
if (i & 8) mask |= MTLColorWriteMaskAlpha;
NSString* name = [NSString stringWithFormat:@"copy_%s%s%s%s", i & 1 ? "r" : "", i & 2 ? "g" : "", i & 4 ? "b" : "", i & 8 ? "a" : ""];
pdesc.colorAttachments[0].writeMask = mask;
m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, ps_copy, name);
m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, (i >= 16) ? ps_copy_rta_correct : ps_copy, name);
}
pdesc.colorAttachments[0].blendingEnabled = YES;
@ -1572,7 +1573,7 @@ void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
DoStretchRect(sTex, sRect, dTex, dRect, pipeline, linear, load_action, nullptr, 0);
}}
void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha)
void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader)
{ @autoreleasepool {
int sel = 0;
if (red) sel |= 1;
@ -1580,7 +1581,7 @@ void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
if (blue) sel |= 4;
if (alpha) sel |= 8;
id<MTLRenderPipelineState> pipeline = m_convert_pipeline_copy_mask[sel];
id<MTLRenderPipelineState> pipeline = m_convert_pipeline_copy_mask[(shader == ShaderConvert::RTA_CORRECTION) ? (sel + 16) : sel];
DoStretchRect(sTex, sRect, dTex, dRect, pipeline, false, sel == 15 ? LoadAction::DontCareIfFull : LoadAction::Load, nullptr, 0);
}}
@ -1642,7 +1643,7 @@ void GSDeviceMTL::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_r
const u32 vertex_count = end - start;
const u32 index_count = vertex_count + (vertex_count >> 1); // 6 indices per 4 vertices
id<MTLRenderPipelineState> new_pipeline = wmask == 0xf ? m_convert_pipeline[static_cast<int>(shader)]
: m_convert_pipeline_copy_mask[wmask];
: m_convert_pipeline_copy_mask[(shader == ShaderConvert::RTA_CORRECTION) ? (wmask + 16) : wmask];
if (new_pipeline != pipeline)
{
pipeline = new_pipeline;

View File

@ -1454,7 +1454,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
StretchRect(sTex, sRect, dTex, dRect, ps, false, OMColorMaskSelector(), linear);
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha)
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader)
{
OMColorMaskSelector cms;
@ -1463,7 +1463,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
cms.wb = blue;
cms.wa = alpha;
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)ShaderConvert::COPY], false, cms, false);
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], false, cms, false);
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear)

View File

@ -319,7 +319,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool linear = true);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;

View File

@ -2917,13 +2917,14 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
}
void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
bool green, bool blue, bool alpha)
bool green, bool blue, bool alpha, ShaderConvert shader)
{
GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha);
const u32 index = (red ? 1 : 0) | (green ? 2 : 0) | (blue ? 4 : 0) | (alpha ? 8 : 0);
const bool allow_discard = (index == 0xf);
DoStretchRect(static_cast<GSTextureVK*>(sTex), sRect, static_cast<GSTextureVK*>(dTex), dRect, m_color_copy[index],
int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0;
DoStretchRect(static_cast<GSTextureVK*>(sTex), sRect, static_cast<GSTextureVK*>(dTex), dRect, m_color_copy[index + rta_offset],
false, allow_discard);
}
@ -3046,9 +3047,10 @@ void GSDeviceVK::DoMultiStretchRects(
BeginRenderPassForStretchRect(dTex, rc, rc, false);
SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler);
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf);
int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0;
SetPipeline(
(rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba] : m_convert[static_cast<int>(shader)]);
(rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba + rta_offset] : m_convert[static_cast<int>(shader)]);
if (ApplyUtilityState())
DrawIndexedPrimitive();
@ -4018,18 +4020,44 @@ bool GSDeviceVK::CompileConvertPipelines()
{
// compile color copy pipelines
gpb.SetRenderPass(m_utility_color_render_pass_discard, 0);
for (u32 i = 0; i < 16; i++)
for (u32 j = 0; j < 16; j++)
{
pxAssert(!m_color_copy[i]);
pxAssert(!m_color_copy[j]);
gpb.ClearBlendAttachments();
gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast<VkColorComponentFlags>(i));
m_color_copy[i] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false);
if (!m_color_copy[i])
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast<VkColorComponentFlags>(j));
m_color_copy[j] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false);
if (!m_color_copy[j])
return false;
Vulkan::SetObjectName(m_device, m_color_copy[i], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", i & 1u,
(i >> 1) & 1u, (i >> 2) & 1u, (i >> 3) & 1u);
Vulkan::SetObjectName(m_device, m_color_copy[j], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", j & 1u,
(j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u);
}
}
else if (i == ShaderConvert::RTA_CORRECTION)
{
// compile color copy pipelines
gpb.SetRenderPass(m_utility_color_render_pass_discard, 0);
VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i));
if (ps == VK_NULL_HANDLE)
return false;
ScopedGuard ps_guard([this, &ps]() { vkDestroyShaderModule(m_device, ps, nullptr); });
gpb.SetFragmentShader(ps);
for (u32 j = 16; j < 32; j++)
{
pxAssert(!m_color_copy[j]);
gpb.ClearBlendAttachments();
gpb.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, static_cast<VkColorComponentFlags>(j - 16));
m_color_copy[j] = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false);
if (!m_color_copy[j])
return false;
Vulkan::SetObjectName(m_device, m_color_copy[j], "Color copy pipeline (r=%u, g=%u, b=%u, a=%u)", j & 1u,
(j >> 1) & 1u, (j >> 2) & 1u, (j >> 3) & 1u);
}
}
else if (i == ShaderConvert::HDR_INIT || i == ShaderConvert::HDR_RESOLVE)

View File

@ -416,7 +416,7 @@ private:
std::array<VkPipeline, static_cast<int>(ShaderConvert::Count)> m_convert{};
std::array<VkPipeline, static_cast<int>(PresentShader::Count)> m_present{};
std::array<VkPipeline, 16> m_color_copy{};
std::array<VkPipeline, 32> m_color_copy{};
std::array<VkPipeline, 2> m_merge{};
std::array<VkPipeline, NUM_INTERLACE_SHADERS> m_interlace{};
VkPipeline m_hdr_setup_pipelines[2][2] = {}; // [depth][feedback_loop]
@ -551,7 +551,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
bool green, bool blue, bool alpha) override;
bool green, bool blue, bool alpha, ShaderConvert shader = ShaderConvert::COPY) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
PresentShader shader, float shaderTime, bool linear) override;
void DrawMultiStretchRects(