GS/HW: Support alpha in RT concurrently with Z

Also further improve double half clear detection, and handling of
horizontal page clears.

Fixes lens flare and DoF in True Crime: NYC.
Fixes reflections in Eragon.
Fixes floor in Area 51.
Fixes flickering in Transformers.
Fixes text in Katamari Damashii.
This commit is contained in:
Stenzek 2023-07-15 23:46:43 +10:00 committed by Connor McLaughlin
parent fef06a1544
commit dffeb2b5dd
20 changed files with 488 additions and 145 deletions

View File

@ -46,14 +46,6 @@ const CRC::Game CRC::m_games[] =
{0x47BA9034, SMTNocturne /* JP */}, // SMTNocturne Maniacs Chronicle
{0xD3FFC263, SMTNocturne /* KO */},
{0x84D1A8DA, SMTNocturne /* KO */},
{0xE21404E2, GetawayGames /* US */}, // Getaway
{0xE8249852, GetawayGames /* JP */}, // Getaway
{0x458485EF, GetawayGames /* EU */}, // Getaway
{0x5DFBE144, GetawayGames /* EU */}, // Getaway
{0xE78971DF, GetawayGames /* US */}, // GetawayBlackMonday
{0x342D97FA, GetawayGames /* US */}, // GetawayBlackMonday Demo
{0xE8C0AD1A, GetawayGames /* JP */}, // GetawayBlackMonday
{0x09C3DF79, GetawayGames /* EU */}, // GetawayBlackMonday
};
const CRC::Game& CRC::Lookup(u32 crc)

View File

@ -23,7 +23,6 @@ public:
enum Title : u32
{
NoTitle,
GetawayGames,
ICO,
SMTNocturne,
Tekken5,

View File

@ -485,6 +485,12 @@ u32 GSLocalMemory::GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect)
return result;
}
u32 GSLocalMemory::GetUnwrappedEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect)
{
const u32 result = GetEndBlockAddress(bp, bw, psm, rect);
return (result < bp) ? (result + MAX_BLOCKS) : result;
}
GSVector4i GSLocalMemory::GetRectForPageOffset(u32 base_bp, u32 offset_bp, u32 bw, u32 psm)
{
pxAssert((base_bp % BLOCKS_PER_PAGE) == 0 && (offset_bp % BLOCKS_PER_PAGE) == 0);

View File

@ -549,6 +549,7 @@ public:
static bool IsPageAligned(u32 psm, const GSVector4i& rc);
static u32 GetStartBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect);
static u32 GetEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect);
static u32 GetUnwrappedEndBlockAddress(u32 bp, u32 bw, u32 psm, GSVector4i rect);
static GSVector4i GetRectForPageOffset(u32 base_bp, u32 offset_bp, u32 bw, u32 psm);
// address

View File

@ -83,6 +83,16 @@ public:
{
return { x / v.x, y / v.y };
}
GSVector2T min(const GSVector2T& v) const
{
return { std::min(x, v.x), std::min(y, v.y) };
}
GSVector2T max(const GSVector2T& v) const
{
return { std::max(x, v.x), std::max(y, v.y) };
}
};
typedef GSVector2T<float> GSVector2;

View File

@ -42,6 +42,7 @@ const char* shaderName(ShaderConvert value)
case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_RGBA8: return "ps_convert_float32_rgba8";
case ShaderConvert::FLOAT32_TO_RGB8: return "ps_convert_float32_rgba8";
case ShaderConvert::FLOAT16_TO_RGB5A1: return "ps_convert_float16_rgb5a1";
case ShaderConvert::RGBA8_TO_FLOAT32: return "ps_convert_rgba8_float32";
case ShaderConvert::RGBA8_TO_FLOAT24: return "ps_convert_rgba8_float24";

View File

@ -37,6 +37,7 @@ enum class ShaderConvert
FLOAT32_TO_16_BITS,
FLOAT32_TO_32_BITS,
FLOAT32_TO_RGBA8,
FLOAT32_TO_RGB8,
FLOAT16_TO_RGB5A1,
RGBA8_TO_FLOAT32,
RGBA8_TO_FLOAT24,
@ -123,6 +124,17 @@ static inline bool SupportsBilinear(ShaderConvert shader)
}
}
static inline u32 ShaderConvertWriteMask(ShaderConvert shader)
{
switch (shader)
{
case ShaderConvert::FLOAT32_TO_RGB8:
return 0x7;
default:
return 0xf;
}
}
enum class PresentShader
{
COPY = 0,

View File

@ -1207,7 +1207,8 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
{
pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader));
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr, linear);
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr,
m_convert.bs[ShaderConvertWriteMask(shader)].get(), linear);
}
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, bool linear)

View File

@ -193,6 +193,11 @@ void D3D12::GraphicsPipelineBuilder::SetBlendState(u32 rt, bool blend_enable, D3
m_desc.BlendState.IndependentBlendEnable = TRUE;
}
void D3D12::GraphicsPipelineBuilder::SetColorWriteMask(u32 rt, u8 write_mask /* = D3D12_COLOR_WRITE_ENABLE_ALL */)
{
m_desc.BlendState.RenderTarget[rt].RenderTargetWriteMask = write_mask;
}
void D3D12::GraphicsPipelineBuilder::SetNoBlendingState()
{
SetBlendState(0, false, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO,

View File

@ -105,6 +105,7 @@ namespace D3D12
void SetBlendState(u32 rt, bool blend_enable, D3D12_BLEND src_factor, D3D12_BLEND dst_factor, D3D12_BLEND_OP op,
D3D12_BLEND alpha_src_factor, D3D12_BLEND alpha_dst_factor, D3D12_BLEND_OP alpha_op,
u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL);
void SetColorWriteMask(u32 rt, u8 write_mask = D3D12_COLOR_WRITE_ENABLE_ALL);
void SetNoBlendingState();

View File

@ -1341,7 +1341,8 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
int(dRect.right - dRect.left), int(dRect.bottom - dRect.top));
DoStretchRect(static_cast<GSTexture12*>(sTex), sRect, static_cast<GSTexture12*>(dTex), dRect,
dTex ? m_convert[static_cast<int>(shader)].get() : m_present[static_cast<int>(shader)].get(), linear, true);
dTex ? m_convert[static_cast<int>(shader)].get() : m_present[static_cast<int>(shader)].get(), linear,
ShaderConvertWriteMask(shader) == 0xf);
}
void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
@ -2376,6 +2377,8 @@ bool GSDevice12::CompileConvertPipelines()
gpb.SetNoStencilState();
}
gpb.SetColorWriteMask(0, ShaderConvertWriteMask(i));
ComPtr<ID3DBlob> ps(GetUtilityPixelShader(*shader, shaderName(i)));
if (!ps)
return false;

View File

@ -920,6 +920,18 @@ GSVector4i GSRendererHW::GetSplitTextureShuffleDrawRect() const
return r.insert64<0>(0).ralign<Align_Outside>(frame_psm.pgs);
}
u32 GSRendererHW::GetEffectiveTextureShuffleFbmsk() const
{
pxAssert(m_texture_shuffle);
const u32 m = m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk;
const u32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000);
const u32 rb_mask = fbmask & 0xFF;
const u32 ga_mask = (fbmask >> 8) & 0xFF;
const u32 eff_mask =
((rb_mask == 0xFF && ga_mask == 0xFF) ? 0x00FFFFFFu : 0) | ((ga_mask == 0xFF) ? 0xFF000000u : 0);
return eff_mask;
}
GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages)
{
const GSVector2i& pgs = GSLocalMemory::m_psm[psm].pgs;
@ -1038,8 +1050,8 @@ bool GSRendererHW::IsStartingSplitClear()
if (IsDiscardingDstColor())
{
const u32 bp = m_cached_ctx.FRAME.Block();
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp, m_cached_ctx.FRAME.PSM);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp, m_cached_ctx.FRAME.PSM);
}
return true;
@ -1059,8 +1071,8 @@ bool GSRendererHW::ContinueSplitClear()
if (IsDiscardingDstColor())
{
const u32 bp = m_cached_ctx.FRAME.Block();
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, bp, m_cached_ctx.FRAME.PSM);
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, bp, m_cached_ctx.FRAME.PSM);
}
// Check next draw.
@ -1954,22 +1966,22 @@ void GSRendererHW::Draw()
// If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth.
if (is_zero_clear || height_invalid)
{
const u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const u32 ds_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r);
// If this is a partial clear of a larger buffer, we can't invalidate the target, since we'll be losing data
// which only existed on the GPU. Assume a BW change is a new target, though. Test case: Persona 3 shadows.
GSTextureCache::Target* tgt;
const bool overwriting_whole_rt =
(no_rt || height_invalid ||
(tgt = g_texture_cache->GetExactTarget(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW,
GSTextureCache::RenderTarget,
GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW,
m_cached_ctx.FRAME.PSM, m_r))) == nullptr ||
GSTextureCache::RenderTarget, rt_end_bp)) == nullptr ||
m_r.rintersect(tgt->m_valid).eq(tgt->m_valid));
const bool overwriting_whole_ds =
(no_ds || height_invalid ||
(tgt = g_texture_cache->GetExactTarget(m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW,
GSTextureCache::DepthStencil,
GSLocalMemory::GetEndBlockAddress(m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW,
m_cached_ctx.ZBUF.PSM, m_r))) == nullptr ||
GSTextureCache::DepthStencil, ds_end_bp)) == nullptr ||
m_r.rintersect(tgt->m_valid).eq(tgt->m_valid));
if (overwriting_whole_rt && overwriting_whole_ds && TryGSMemClear())
@ -1979,16 +1991,20 @@ void GSRendererHW::Draw()
if (!no_rt)
{
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, m_cached_ctx.FRAME.Block());
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, m_cached_ctx.FRAME.Block());
g_texture_cache->InvalidateVideoMem(m_context->offset.fb, m_r, true);
g_texture_cache->InvalidateVideoMem(m_context->offset.fb, m_r, false);
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM);
}
if (!no_ds && m_cached_ctx.ZBUF.ZMSK == 0)
{
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block());
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, m_cached_ctx.ZBUF.Block());
g_texture_cache->InvalidateVideoMem(m_context->offset.zb, m_r, true);
g_texture_cache->InvalidateVideoMem(m_context->offset.zb, m_r, false);
g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r),
ds_end_bp, m_cached_ctx.ZBUF.PSM);
}
cleanup_draw();
@ -2397,15 +2413,10 @@ void GSRendererHW::Draw()
rt->m_TEX0 = FRAME_TEX0;
rt->m_TEX0.TBW = std::max(width, FRAME_TEX0.TBW);
}
rt->UpdateValidAlpha(FRAME_TEX0.PSM, fm);
}
if (ds)
{
ds->m_TEX0 = ZBUF_TEX0;
ds->UpdateValidAlpha(ZBUF_TEX0.PSM, zm);
}
}
else if (!m_texture_shuffle)
{
@ -2415,16 +2426,20 @@ void GSRendererHW::Draw()
{
rt->m_TEX0.TBW = std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW);
rt->m_TEX0.PSM = FRAME_TEX0.PSM;
rt->UpdateValidAlpha(FRAME_TEX0.PSM, fm);
}
if (ds)
{
ds->m_TEX0.TBW = std::max(ds->m_TEX0.TBW, ZBUF_TEX0.TBW);
ds->m_TEX0.PSM = ZBUF_TEX0.PSM;
ds->UpdateValidAlpha(ZBUF_TEX0.PSM, zm);
}
}
// Figure out which channels we're writing.
if (rt)
rt->UpdateValidChannels(rt->m_TEX0.PSM, m_texture_shuffle ? GetEffectiveTextureShuffleFbmsk() : fm);
if (ds)
ds->UpdateValidChannels(ZBUF_TEX0.PSM, zm);
if (rt)
rt->Update();
if (ds)
@ -2656,21 +2671,17 @@ void GSRendererHW::Draw()
if (old_ds)
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, old_ds->m_TEX0.TBP0);
// Make sure they aren't the same source (double half clear), we don't want to invalidate the texture.
const bool can_invalidate = (m_cached_ctx.FRAME.Block() != m_cached_ctx.ZBUF.Block()) || !rt || !ds;
if ((fm & fm_mask) != fm_mask && rt)
{
//rt->m_valid = rt->m_valid.runion(r);
// Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
rt->UpdateValidBits(~fm & fm_mask);
g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false);
if (can_invalidate)
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, m_cached_ctx.FRAME.Block());
// Remove overwritten Zs at the FBP.
g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, m_cached_ctx.FRAME.Block(),
m_cached_ctx.FRAME.PSM, m_texture_shuffle ? GetEffectiveTextureShuffleFbmsk() : fm);
}
if (zm != 0xffffffff && ds)
@ -2679,12 +2690,11 @@ void GSRendererHW::Draw()
// Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
ds->UpdateValidBits(GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].fmsk);
g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false);
if (can_invalidate)
g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block());
// Remove overwritten RTs at the ZBP.
g_texture_cache->InvalidateVideoMemType(
GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm);
}
//
@ -3060,7 +3070,10 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
// Set dirty alpha on target, but only if we're actually writing to it.
if (rt)
rt->m_valid_alpha |= m_conf.colormask.wa;
{
rt->m_valid_alpha_low |= m_conf.colormask.wa;
rt->m_valid_alpha_high |= m_conf.colormask.wa;
}
// Once we draw the shuffle, no more buffering.
m_split_texture_shuffle_pages = 0;
@ -5512,24 +5525,35 @@ bool GSRendererHW::DetectDoubleHalfClear(bool& no_rt, bool& no_ds)
}
// Are we clearing over the middle of this target?
if (!tgt || (((half + written_pages) * BLOCKS_PER_PAGE) - 1) > tgt->m_end_block)
if (!tgt || (((half + written_pages) * BLOCKS_PER_PAGE) - 1) >
GSLocalMemory::GetUnwrappedEndBlockAddress(
tgt->m_TEX0.TBP0, tgt->m_TEX0.TBW, tgt->m_TEX0.PSM, tgt->GetUnscaledRect()))
{
return false;
}
// Siren double half clears horizontally with half FBW instead of vertically.
// We could use the FBW here, but using the rectangle seems a bit safer, because changing FBW
// from one RT to another isn't uncommon.
horizontal = (m_r.w >= tgt->m_valid.w);
const GSVector4 vr = GSVector4(m_r.rintersect(tgt->m_valid)) / GSVector4(tgt->m_valid);
horizontal = (vr.z < vr.w);
}
GL_INS("DetectDoubleHalfClear(): Clearing %s, fbp=%x, zbp=%x, pages=%u, base=%x, half=%x, rect=(%d,%d=>%d,%d)",
clear_depth ? "depth" : "color", m_cached_ctx.FRAME.Block(), m_cached_ctx.ZBUF.Block(), written_pages,
base * BLOCKS_PER_PAGE, half * BLOCKS_PER_PAGE, m_r.x, m_r.y, m_r.z, m_r.w);
GL_INS("DetectDoubleHalfClear(): Clearing %s %s, fbp=%x, zbp=%x, pages=%u, base=%x, half=%x, rect=(%d,%d=>%d,%d)",
clear_depth ? "depth" : "color", horizontal ? "horizontally" : "vertically", m_cached_ctx.FRAME.Block(),
m_cached_ctx.ZBUF.Block(), written_pages, base * BLOCKS_PER_PAGE, half * BLOCKS_PER_PAGE, m_r.x, m_r.y, m_r.z,
m_r.w);
// Double the clear rect.
if (horizontal)
{
m_cached_ctx.FRAME.FBW *= 2;
m_r.z += m_r.x + m_r.width();
}
else
{
m_r.w += m_r.y + m_r.height();
}
ReplaceVerticesWithSprite(m_r, GSVector2i(1, 1));
// Prevent wasting time looking up and creating the target which is getting blown away.

View File

@ -104,6 +104,7 @@ private:
bool IsPossibleChannelShuffle() const;
bool IsSplitTextureShuffle();
GSVector4i GetSplitTextureShuffleDrawRect() const;
u32 GetEffectiveTextureShuffleFbmsk() const;
static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages);
bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw);

View File

@ -95,12 +95,10 @@ void GSTextureCache::RemoveAll()
m_surface_offset_cache.clear();
}
bool GSTextureCache::FullRectDirty(Target* target)
bool GSTextureCache::FullRectDirty(Target* target, u32 rgba_mask)
{
RGBAMask rgba;
rgba._u32 = GSUtil::GetChannelMask(target->m_TEX0.PSM);
// One complete dirty rect, not pieces (Add dirty rect function should be able to join these all together).
if (target->m_age != 0 && target->m_dirty.size() == 1 && rgba._u32 == target->m_dirty[0].rgba._u32 && target->m_valid.rintersect(target->m_dirty[0].r).eq(target->m_valid))
if (target->m_dirty.size() == 1 && (rgba_mask & target->m_dirty[0].rgba._u32) == rgba_mask && target->m_valid.rintersect(target->m_dirty[0].r).eq(target->m_valid))
{
return true;
}
@ -108,6 +106,15 @@ bool GSTextureCache::FullRectDirty(Target* target)
return false;
}
bool GSTextureCache::FullRectDirty(Target* target)
{
// Why?
if (target->m_age == 0)
return false;
return FullRectDirty(target, GSUtil::GetChannelMask(target->m_TEX0.PSM));
}
void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw, RGBAMask rgba, bool req_linear)
{
bool skipdirty = false;
@ -170,7 +177,6 @@ void GSTextureCache::ResizeTarget(Target* t, GSVector4i rect, u32 tbp, u32 psm,
AddDirtyRectTarget(t, GSVector4i(t->m_valid.z, t->m_valid.y, t->m_valid.z + std::max(0, size_delta.x), t->m_valid.w), t->m_TEX0.PSM, t->m_TEX0.TBW, rgba);
const GSVector4i valid_rect = { t->m_valid.x, t->m_valid.y, t->m_valid.z + std::max(0, size_delta.x), t->m_valid.w + std::max(0, size_delta.y) };
t->UpdateValidity(valid_rect);
t->UpdateValidBits(GSLocalMemory::m_psm[t->m_TEX0.PSM].fmsk);
GetTargetSize(tbp, tbw, psm, t->m_valid.z, t->m_valid.w);
const int new_w = std::max(t->m_unscaled_size.x, t->m_valid.z);
const int new_h = std::max(t->m_unscaled_size.y, t->m_valid.w);
@ -640,7 +646,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
{
const GSVector2i page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs;
const bool can_translate = CanTranslate(bp, TEX0.TBW, psm, r, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW);
const bool swizzle_match = GSLocalMemory::m_psm[psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
const bool swizzle_match = psm_s.depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
if (can_translate)
{
@ -651,9 +657,9 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
else
{
// If it's not page aligned, grab the whole pages it covers, to be safe.
if (GSLocalMemory::m_psm[psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp)
if (psm_s.bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp)
{
const GSVector2i dst_page_size = GSLocalMemory::m_psm[psm].pgs;
const GSVector2i dst_page_size = psm_s.pgs;
target_rc = GSVector4i(target_rc.x / page_size.x, target_rc.y / page_size.y,
(target_rc.z + (page_size.x - 1)) / page_size.x,
(target_rc.w + (page_size.y - 1)) / page_size.y);
@ -679,6 +685,27 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
}
}
if (dst && psm_s.trbpp != 24 && !dst->HasValidAlpha())
{
for (Target* t : m_dst[RenderTarget])
{
if (t->m_age <= 1 && t->m_TEX0.TBP0 == bp && t->HasValidAlpha())
{
GL_CACHE("TC depth: Using RT %x instead of depth because of missing alpha", t->m_TEX0.TBP0);
// Have to update here, because this is a source, it won't Update().
if (FullRectDirty(t, 0x7))
t->Update();
else if (!t->m_valid_rgb)
CopyRGBFromDepthToColor(t, dst);
dst = t;
inside_target = false;
break;
}
}
}
if (!dst)
{
// Retry on the render target (Silent Hill 4)
@ -810,6 +837,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
{
if (t->m_used)
{
// Skip over targets that we're only keeping around for the alpha, when the RGB is now being used for depth.
if (!t->HasValidBitsForFormat(psm))
continue;
// Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written
// 2/ RT used as 24 bits => no update of alpha channel
@ -818,7 +849,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
//
// Solution: consider the RT as 32 bits if the alpha was used in the past
// We can render to the target as C32, but mask alpha, in which case, pretend like it doesn't have any.
const u32 t_psm = t->m_valid_alpha ? t->m_TEX0.PSM & ~0x1 : ((t->m_TEX0.PSM == PSMCT32) ? PSMCT24 : t->m_TEX0.PSM);
const u32 t_psm = t->HasValidAlpha() ? t->m_TEX0.PSM & ~0x1 : ((t->m_TEX0.PSM == PSMCT32) ? PSMCT24 : t->m_TEX0.PSM);
bool rect_clean = GSUtil::HasSameSwizzleBits(psm, t_psm);
const bool width_match = (std::max(64U, bw * 64U) >> GSLocalMemory::m_psm[psm].info.pageShiftX()) ==
(std::max(64U, t->m_TEX0.TBW * 64U) >> GSLocalMemory::m_psm[t->m_TEX0.PSM].info.pageShiftX());
@ -1205,6 +1236,12 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block,
return nullptr;
}
GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float scale)
{
return GSVector2i(static_cast<int>(std::ceil(static_cast<float>(sz.x) * scale)),
static_cast<int>(std::ceil(static_cast<float>(sz.y) * scale)));
}
GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect)
{
@ -1219,10 +1256,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{
// TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one.
clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y);
new_size.x = std::max(size.x, tgt->m_unscaled_size.x);
new_size.y = std::max(size.y, tgt->m_unscaled_size.y);
new_scaled_size.x = static_cast<int>(std::ceil(static_cast<float>(new_size.x) * scale));
new_scaled_size.y = static_cast<int>(std::ceil(static_cast<float>(new_size.y) * scale));
new_size = size.max(tgt->m_unscaled_size);
new_scaled_size = ScaleRenderTargetSize(new_size, scale);
dRect = (GSVector4(GSVector4i::loadh(tgt->m_unscaled_size)) * GSVector4(scale)).ceil();
GL_INS("TC Rescale: %dx%d: %dx%d @ %f -> %dx%d @ %f", tgt->m_unscaled_size.x, tgt->m_unscaled_size.y,
tgt->m_texture->GetWidth(), tgt->m_texture->GetHeight(), tgt->m_scale, new_scaled_size.x, new_scaled_size.y,
@ -1347,6 +1382,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
calcRescale(dst);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) :
g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear);
if (!tex)
return nullptr;
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
m_target_memory_usage = (m_target_memory_usage - dst->m_texture->GetMemUsage()) + tex->GetMemUsage();
@ -1362,6 +1400,51 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_unscaled_size = new_size;
}
// If our RGB was invalidated, we need to pull it from depth.
if (type == RenderTarget && (preserve_target || !dst->m_valid.rintersect(draw_rect).eq(dst->m_valid)) &&
!dst->m_valid_rgb && !FullRectDirty(dst, 0x7) &&
(GSLocalMemory::m_psm[TEX0.PSM].trbpp < 24 || fbmask != 0x00FFFFFFu))
{
GL_CACHE("TC: Attempt to repopulate RGB for %s[%x]", to_string(type), dst->m_TEX0.TBP0);
for (Target* dst_match : m_dst[DepthStencil])
{
if (dst_match->m_TEX0.TBP0 != TEX0.TBP0 || !dst_match->m_valid_rgb)
continue;
if (!CopyRGBFromDepthToColor(dst, dst_match))
{
// Needed new texture and memory allocation failed.
return nullptr;
}
break;
}
if (!dst->m_valid_rgb)
{
GL_CACHE("TC: Cannot find RGB target for %s[%x], clearing.", to_string(type), dst->m_TEX0.TBP0);
// We couldn't get RGB from any depth targets. So clear and preload.
// Unfortunately, we still have an alpha channel to preserve, and we can't clear RGB...
// So, create a new target, clear/preload it, and copy RGB in.
GSTexture* tex = (type == RenderTarget) ?
g_gs_device->CreateRenderTarget(dst->m_texture->GetWidth(),
dst->m_texture->GetHeight(), GSTexture::Format::Color, true) :
g_gs_device->CreateDepthStencil(dst->m_texture->GetWidth(),
dst->m_texture->GetHeight(), GSTexture::Format::DepthStencil, true);
if (!tex)
return nullptr;
std::swap(dst->m_texture, tex);
PreloadTarget(TEX0, size, GSVector2i(dst->m_valid.z, dst->m_valid.w), is_frame, preload,
preserve_target, draw_rect, dst);
g_gs_device->StretchRect(tex, GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f), dst->m_texture,
GSVector4(dst->m_texture->GetRect()), false, false, false, true);
g_gs_device->Recycle(tex);
dst->m_valid_rgb = true;
}
}
// Drop dirty rect if we're overwriting the whole target.
if (!preserve_target && draw_rect.rintersect(dst->m_valid).eq(dst->m_valid))
{
@ -1384,19 +1467,20 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data.
Target* dst_match = nullptr;
for (auto t : m_dst[rev_type])
for (Target* t : m_dst[rev_type])
{
if (bp == t->m_TEX0.TBP0)
// Don't pull in targets without valid lower 24 bits, it makes no sense to convert them.
if (bp != t->m_TEX0.TBP0 || !t->m_valid_rgb)
continue;
if (t->m_age == 0)
{
if (t->m_age == 0)
{
dst_match = t;
break;
}
else if (t->m_age == 1)
{
dst_match = t;
}
dst_match = t;
break;
}
else if (t->m_age == 1)
{
dst_match = t;
}
}
@ -1411,6 +1495,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->OffsetHack_modxy = dst_match->OffsetHack_modxy;
dst->m_end_block = dst_match->m_end_block; // If we're copying the size, we need to keep the end block.
dst->m_valid = dst_match->m_valid;
dst->m_valid_alpha_low = dst_match->m_valid_alpha_low && psm_s.trbpp != 24;
dst->m_valid_alpha_high = dst_match->m_valid_alpha_high && psm_s.trbpp != 24;
dst->m_valid_rgb = dst_match->m_valid_rgb;
ShaderConvert shader;
// m_32_bits_fmt gets set on a shuffle or if the format isn't 16bit.
@ -1483,7 +1570,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_used |= used;
if (is_frame)
dst->m_valid_alpha = false;
{
// TODO: Why are we doing this?!
dst->m_valid_alpha_low = false;
dst->m_valid_alpha_high = false;
}
dst->readbacks_since_draw = 0;
@ -1496,8 +1587,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
if (type == DepthStencil)
{
GL_CACHE("TC: Lookup Target(Depth) %dx%d, miss (0x%x, TBW %d, %s)", size.x, size.y, TEX0.TBP0,
@ -1510,6 +1599,32 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
}
Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true);
PreloadTarget(TEX0, size, valid_size, is_frame, preload, preserve_target, draw_rect, dst);;
dst->m_is_frame = is_frame;
dst->m_used |= used;
// Not *strictly* correct if RGB is masked, but we won't use it as a texture if not..
dst->m_valid_rgb = true;
if (is_frame)
{
// TODO: Why are we doing this?!
dst->m_valid_alpha_low = false;
dst->m_valid_alpha_high = false;
}
dst->readbacks_since_draw = 0;
assert(dst && dst->m_texture && dst->m_scale == scale);
return dst;
}
void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst)
{
// In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory.
// In practice, TC will wrongly invalidate some RT. For example due to write on the alpha
@ -1521,15 +1636,14 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
//
// From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable.
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const bool supported_fmt = !GSConfig.UserHacks_DisableDepthSupport || psm_s.depth == 0;
if (TEX0.TBW > 0 && supported_fmt)
{
const bool forced_preload = GSRendererHW::GetInstance()->m_force_preload > 0;
const GSVector4i newrect = GSVector4i::loadh(size);
u32 rect_end = GSLocalMemory::GetEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect);
if (rect_end < TEX0.TBP0)
rect_end += MAX_BLOCKS;
const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect);
RGBAMask rgba;
rgba._u32 = GSUtil::GetChannelMask(TEX0.PSM);
@ -1632,19 +1746,9 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
}
}
else
{
dst->UpdateValidity(GSVector4i::loadh(valid_size));
dst->m_is_frame = is_frame;
dst->m_used |= used;
if (is_frame)
dst->m_valid_alpha = false;
dst->readbacks_since_draw = 0;
assert(dst && dst->m_texture && dst->m_scale == scale);
return dst;
}
}
GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale)
@ -1779,6 +1883,81 @@ u32 GSTextureCache::ConvertDepthToColor(float d, ShaderConvert convert)
}
}
bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src)
{
GL_CACHE("TC: Copy RGB from %dx%d %s[%x, %s] to %dx%d %s[%x, %s]", depth_src->GetUnscaledWidth(),
depth_src->GetUnscaledHeight(), to_string(depth_src->m_type), depth_src->m_TEX0.TBP0,
psm_str(depth_src->m_TEX0.PSM), dst->GetUnscaledWidth(), dst->GetUnscaledHeight(), to_string(dst->m_type),
dst->m_TEX0.TBP0, psm_str(dst->m_TEX0.PSM));
// The depth target might be larger (Driv3r).
const GSVector2i new_size = depth_src->GetUnscaledSize().max(dst->GetUnscaledSize());
const GSVector2i new_scaled_size = ScaleRenderTargetSize(new_size, dst->GetScale());
const bool needs_new_tex = (new_size != dst->m_unscaled_size);
GSTexture* tex = dst->m_texture;
if (needs_new_tex)
{
tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color,
new_size != dst->m_unscaled_size || new_size != depth_src->m_unscaled_size);
if (!tex)
return false;
}
// Remove any dirty rectangles contained by this update, we don't want to pull from local memory.
const GSVector4i clear_dirty_rc = GSVector4i::loadh(depth_src->GetUnscaledSize());
for (u32 i = 0; i < dst->m_dirty.size(); i++)
{
GSDirtyRect& dr = dst->m_dirty[i];
const GSVector4i drc = dr.GetDirtyRect(dst->m_TEX0);
if (!drc.rintersect(clear_dirty_rc).rempty())
{
if ((dr.rgba._u32 &= ~0x7) == 0)
{
GL_CACHE("TC: Remove dirty rect (%d,%d=>%d,%d) from %s[%x, %s] due to incoming depth.", drc.left,
drc.top, drc.right, drc.bottom, to_string(dst->m_type), dst->m_TEX0.TBP0, psm_str(dst->m_TEX0.PSM));
dst->m_dirty.erase(dst->m_dirty.begin() + i);
continue;
}
}
i++;
}
// Depth source should be up to date.
depth_src->Update();
constexpr ShaderConvert shader = ShaderConvert::FLOAT32_TO_RGB8;
if (depth_src->m_texture->GetState() == GSTexture::State::Cleared)
{
g_gs_device->ClearRenderTarget(tex, ConvertDepthToColor(depth_src->m_texture->GetClearDepth(), shader));
}
else if (depth_src->m_texture->GetState() != GSTexture::State::Invalidated)
{
const GSVector4 convert_rect = GSVector4(depth_src->GetUnscaledRect().rintersect(GSVector4i::loadh(new_size)));
g_gs_device->StretchRect(depth_src->m_texture, convert_rect / GSVector4(depth_src->GetUnscaledSize()).xyxy(),
tex, convert_rect * GSVector4(dst->GetScale()), shader, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
// Copy in alpha if we're a new texture.
if (needs_new_tex)
{
if (dst->m_valid_alpha_low || dst->m_valid_alpha_high)
{
g_gs_device->StretchRect(dst->m_texture, GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f), tex,
GSVector4(GSVector4i::loadh(dst->m_unscaled_size)), false, false, false, true);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
g_gs_device->Recycle(dst->m_texture);
dst->m_texture = tex;
}
dst->m_unscaled_size = new_size;
dst->m_valid_rgb = true;
return true;
}
bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex)
{
GSDownloadTexture* ctex = tex->get();
@ -1799,36 +1978,87 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo
return true;
}
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm)
{
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24);
for (int type = 0; type < 2; type++)
{
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end();)
{
Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
{
++i;
continue;
}
InvalidateSourcesFromTarget(t);
t->m_valid_alpha_low &= preserve_alpha;
t->m_valid_alpha_high &= preserve_alpha;
t->m_valid_rgb = false;
// Don't keep partial depth buffers around.
if ((!t->m_valid_alpha_low && !t->m_valid_alpha_high && !t->m_valid_rgb) || type == DepthStencil)
{
GL_CACHE("TC: InvalidateContainedTargets: Remove Target %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM));
i = list.erase(i);
delete t;
continue;
}
GL_CACHE("TC: InvalidateContainedTargets: Clear RGB valid on %s[%x, %s]", to_string(type), t->m_TEX0.TBP0, psm_str(t->m_TEX0.PSM));
++i;
}
}
}
// Goal: Depth And Target at the same address is not possible. On GS it is
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
// must invalidate the Target/Depth respectively
void GSTextureCache::InvalidateVideoMemType(int type, u32 bp)
void GSTextureCache::InvalidateVideoMemType(int type, u32 bp, u32 write_psm, u32 write_fbmsk)
{
if (GSConfig.UserHacks_DisableDepthSupport)
return;
// The Getaway games need this function disabled for player shadows to work correctly.
if (g_gs_renderer->m_game.title == CRC::GetawayGames)
return;
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
Target* const t = *i;
if (bp != t->m_TEX0.TBP0)
continue;
if (bp == t->m_TEX0.TBP0)
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[write_psm];
const bool new_valid_alpha_low = t->m_valid_alpha_low && (psm_s.trbpp == 24 || (psm_s.trbpp == 32 && (write_fbmsk & 0x0F000000) == 0x0F000000));
const bool new_valid_alpha_high = t->m_valid_alpha_high && (psm_s.trbpp == 24 || (psm_s.trbpp == 32 && (write_fbmsk & 0xF0000000) == 0xF0000000));
const bool new_valid_rgb = t->m_valid_rgb && (psm_s.trbpp >= 24 && (write_fbmsk & 0x00FFFFFF) == 0x00FFFFFF);
// Don't bother trying to keep partial depth buffers around.
if ((new_valid_alpha_low || new_valid_alpha_high || new_valid_rgb) &&
(type != DepthStencil || (GSLocalMemory::m_psm[t->m_TEX0.PSM].trbpp == 24 && new_valid_rgb)))
{
GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) (0x%x)", to_string(type),
t->m_TEX0.TBP0);
// Need to also remove any sources which reference this target.
InvalidateSourcesFromTarget(t);
list.erase(i);
delete t;
if (t->m_valid_alpha_low != new_valid_alpha_low || t->m_valid_alpha_high != new_valid_alpha_high || t->m_valid_rgb != new_valid_rgb)
{
GL_CACHE("TC: InvalidateVideoMemType: Partial Remove Target(%s) (0x%x) RGB: %s->%s, Alow: %s->%s Ahigh: %s->%s",
to_string(type), t->m_TEX0.TBP0, t->m_valid_rgb ? "valid" : "invalid", new_valid_rgb ? "valid" : "invalid",
t->m_valid_alpha_low ? "valid" : "invalid", new_valid_alpha_low ? "valid" : "invalid",
t->m_valid_alpha_high ? "valid" : "invalid", new_valid_alpha_high ? "valid" : "invalid");
}
t->m_valid_alpha_low = new_valid_alpha_low;
t->m_valid_alpha_high = new_valid_alpha_high;
t->m_valid_rgb = new_valid_rgb;
break;
}
GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) (0x%x)", to_string(type),
t->m_TEX0.TBP0);
// Need to also remove any sources which reference this target.
InvalidateSourcesFromTarget(t);
list.erase(i);
delete t;
break;
}
}
@ -2157,7 +2387,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
if (rgba._u32 == 0x8 && t->m_TEX0.PSM == PSMCT32)
{
t->m_TEX0.PSM = PSMCT24;
t->m_valid_alpha = false;
t->m_valid_alpha_low = false;
t->m_valid_alpha_high = false;
++i;
}
else
@ -2181,7 +2412,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
// the texture cache. Otherwise it will generate a wrong
// hit on the texture cache.
// Game: Conflict - Desert Storm (flickering)
t->m_valid_alpha = false;
t->m_valid_alpha_low = false;
t->m_valid_alpha_high = false;
}
++i;
@ -2886,7 +3118,9 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// You'd think we'd update to use the source valid bits, but it's not, because it might be copying some data which was uploaded and dirtied the target.
// An example of this is Cross Channel - To All People where it renders a picture with 0x7f000000 FBMSK at 0x1180, which was all cleared to black on boot,
// Then it moves it to 0x2e80, where some garbage has been loaded underneath, so we can't assume that's the only valid data.
dst->UpdateValidBits(GSLocalMemory::m_psm[DPSM].fmsk);
dst->m_valid_rgb |= src->m_valid_rgb;
dst->m_valid_alpha_low |= src->m_valid_alpha_low;
dst->m_valid_alpha_high |= src->m_valid_alpha_high;
dst->UpdateValidity(GSVector4i(dx, dy, dx + w, dy + h));
dst->UpdateDrawn(GSVector4i(dx, dy, dx + w, dy + h));
dst->m_alpha_max = src->m_alpha_max;
@ -3079,7 +3313,7 @@ GSTextureCache::Target* GSTextureCache::GetTargetWithSharedBits(u32 BP, u32 PSM)
for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU.
{
Target* t = *it;
const u32 t_psm = (t->m_valid_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
const u32 t_psm = (t->HasValidAlpha()) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(BP, PSM, t->m_TEX0.TBP0, t_psm))
return t;
}
@ -4333,9 +4567,8 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
// Don't overwrite bits which aren't used in the target's format.
// Stops Burnout 3's sky from breaking when flushing targets to local memory.
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const u32 write_mask = t->m_valid_bits & psm.fmsk;
if (psm.bpp > 16 && write_mask == 0)
const u32 write_mask = (t->m_valid_rgb ? 0x00FFFFFFu : 0) | (t->m_valid_alpha_low ? 0x0F000000u : 0) | (t->m_valid_alpha_high ? 0xF0000000u : 0);
if (write_mask == 0)
{
DbgCon.Warning("Not reading back target %x PSM %s due to no write mask", TEX0.TBP0, psm_str(TEX0.PSM));
return;
@ -4947,10 +5180,34 @@ void GSTextureCache::Target::UpdateIfDirtyIntersects(const GSVector4i& rc)
}
}
void GSTextureCache::Target::UpdateValidAlpha(u32 psm, u32 fbmsk)
void GSTextureCache::Target::UpdateValidChannels(u32 psm, u32 fbmsk)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[psm];
m_valid_alpha |= (psm_s.trbpp == 32 && (fbmsk & 0xFF000000) != 0xFF000000) || (psm_s.trbpp == 16);
m_valid_alpha_low |= (psm_s.trbpp == 32 && (fbmsk & 0x0F000000) != 0x0F000000) || (psm_s.trbpp == 16);
m_valid_alpha_high |= (psm_s.trbpp == 32 && (fbmsk & 0xF0000000) != 0xF0000000) || (psm_s.trbpp == 16);
m_valid_rgb |= (psm_s.trbpp >= 24 && (fbmsk & 0x00FFFFFF) != 0x00FFFFFF) || (psm_s.trbpp == 16);
}
bool GSTextureCache::Target::HasValidBitsForFormat(u32 psm) const
{
switch (psm)
{
case PSMT4:
return (m_valid_rgb && m_valid_alpha_low && m_valid_alpha_high);
case PSMT8H:
return (m_valid_alpha_low && m_valid_alpha_high);
case PSMT4HL:
return (m_valid_alpha_low);
case PSMT4HH:
return (m_valid_alpha_high);
case PSMT8: // Down here because of channel shuffles.
default:
return (m_valid_rgb);
}
}
void GSTextureCache::Target::ResizeDrawn(const GSVector4i& rect)
@ -4997,35 +5254,29 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
}
void GSTextureCache::Target::UpdateValidBits(u32 bits_written)
{
m_valid_bits |= bits_written;
}
bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old)
{
if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height)
return true;
const int width = m_texture->GetWidth();
const int height = m_texture->GetHeight();
const int new_width = static_cast<int>(std::ceil(new_unscaled_width) * m_scale);
const int new_height = static_cast<int>(std::ceil(new_unscaled_height) * m_scale);
const bool clear = (new_width > width || new_height > height);
const GSVector2i size = m_texture->GetSize();
const GSVector2i new_unscaled_size = GSVector2i(new_unscaled_width, new_unscaled_height);
const GSVector2i new_size = ScaleRenderTargetSize(new_unscaled_size, m_scale);
const bool clear = (new_size.x > size.x || new_size.y > size.y);
GSTexture* tex = m_texture->IsDepthStencil() ?
g_gs_device->CreateDepthStencil(new_width, new_height, m_texture->GetFormat(), clear) :
g_gs_device->CreateRenderTarget(new_width, new_height, m_texture->GetFormat(), clear);
g_gs_device->CreateDepthStencil(new_size.x, new_size.y, m_texture->GetFormat(), clear) :
g_gs_device->CreateRenderTarget(new_size.x, new_size.y, m_texture->GetFormat(), clear);
if (!tex)
{
Console.Error("(ResizeTexture) Failed to allocate %dx%d texture from %dx%d texture", new_width, new_height, width, height);
Console.Error("(ResizeTexture) Failed to allocate %dx%d texture from %dx%d texture", size.x, size.y, new_size.x, new_size.y);
return false;
}
// Only need to copy if it's been written to.
if (m_texture->GetState() == GSTexture::State::Dirty)
{
const GSVector4i rc(0, 0, std::min(width, new_width), std::min(height, new_height));
const GSVector4i rc = GSVector4i::loadh(size.min(new_size));
if (tex->IsDepthStencil())
{
// Can't do partial copies in DirectX for depth textures, and it's probably not ideal in other
@ -5061,8 +5312,7 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca
delete m_texture;
m_texture = tex;
m_unscaled_size.x = new_unscaled_width;
m_unscaled_size.y = new_unscaled_height;
m_unscaled_size = new_unscaled_size;
return true;
}

View File

@ -222,7 +222,9 @@ public:
// Valid alpha means "we have rendered to the alpha channel of this target".
// A false value means that the alpha in local memory is still valid/up-to-date.
bool m_valid_alpha = false;
bool m_valid_alpha_low = false;
bool m_valid_alpha_high = false;
bool m_valid_rgb = false;
bool m_is_frame = false;
bool m_used = false;
@ -230,7 +232,6 @@ public:
GSDirtyRectList m_dirty;
GSVector4i m_valid{};
GSVector4i m_drawn_since_read{};
u32 m_valid_bits = 0;
int readbacks_since_draw = 0;
public:
@ -239,11 +240,13 @@ public:
static Target* Create(GIFRegTEX0 TEX0, int w, int h, float scale, int type, bool clear);
__fi bool HasValidAlpha() const { return (m_valid_alpha_low | m_valid_alpha_high); }
bool HasValidBitsForFormat(u32 psm) const;
void ResizeDrawn(const GSVector4i& rect);
void UpdateDrawn(const GSVector4i& rect, bool can_resize = true);
void ResizeValidity(const GSVector4i& rect);
void UpdateValidity(const GSVector4i& rect, bool can_resize = true);
void UpdateValidBits(u32 bits_written);
void Update();
@ -251,7 +254,7 @@ public:
void UpdateIfDirtyIntersects(const GSVector4i& rc);
/// Updates the valid alpha flag, based on PSM and fbmsk.
void UpdateValidAlpha(u32 psm, u32 fbmsk);
void UpdateValidChannels(u32 psm, u32 fbmsk);
/// Resizes target texture, DOES NOT RESCALE.
bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true);
@ -419,6 +422,12 @@ protected:
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
void PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst);
// Returns scaled texture size.
static GSVector2i ScaleRenderTargetSize(const GSVector2i& sz, float scale);
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
/// plus the height is larger than the current size of the target.
void ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, int real_w, int real_h);
@ -454,7 +463,8 @@ public:
void ReadbackAll();
void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw, RGBAMask rgba, bool req_linear = false);
void ResizeTarget(Target* t, GSVector4i rect, u32 tbp, u32 psm, u32 tbw);
bool FullRectDirty(Target* target);
static bool FullRectDirty(Target* target, u32 rgba_mask);
static bool FullRectDirty(Target* target);
bool CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 dbp, u32 dpsm, u32 dbw);
GSVector4i TranslateAlignedRectByPage(Target* t, u32 sbp, u32 spsm, u32 sbw, GSVector4i src_r, bool is_invalidation = false);
void DirtyRectByPage(u32 sbp, u32 spsm, u32 sbw, Target* t, GSVector4i src_r);
@ -481,7 +491,8 @@ public:
GSVector2i GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width, s32 min_height);
bool Has32BitTarget(u32 bp);
void InvalidateVideoMemType(int type, u32 bp);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32);
void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r, bool full_flush = false);
@ -499,6 +510,9 @@ public:
/// Converts single depth value to colour using the specified shader expression.
static u32 ConvertDepthToColor(float d, ShaderConvert convert);
/// Copies RGB channels from depth target to a color target.
bool CopyRGBFromDepthToColor(Target* dst, Target* depth_src);
bool Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u32 DBW, u32 DPSM, int dx, int dy, int w, int h);
bool ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx, int dy, int w, int h);
bool PageMove(u32 SBP, u32 DBP, u32 BW, u32 PSM, int sx, int sy, int dx, int dy, int w, int h);

View File

@ -1117,14 +1117,23 @@ bool GSDeviceMTL::Create()
case ShaderConvert::RGBA_TO_8I: // Yes really
case ShaderConvert::TRANSPARENCY_FILTER:
case ShaderConvert::FLOAT32_TO_RGBA8:
case ShaderConvert::FLOAT32_TO_RGB8:
case ShaderConvert::FLOAT16_TO_RGB5A1:
case ShaderConvert::YUV:
pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color);
pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid;
break;
}
const u32 scmask = ShaderConvertWriteMask(conv);
MTLColorWriteMask mask = MTLColorWriteMaskNone;
if (scmask & 1) mask |= MTLColorWriteMaskRed;
if (scmask & 2) mask |= MTLColorWriteMaskGreen;
if (scmask & 4) mask |= MTLColorWriteMaskBlue;
if (scmask & 8) mask |= MTLColorWriteMaskAlpha;
pdesc.colorAttachments[0].writeMask = mask;
m_convert_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), name);
}
pdesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid;
for (size_t i = 0; i < std::size(m_present_pipeline); i++)
{

View File

@ -1454,7 +1454,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
{
pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader));
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], linear);
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], false, OMColorMaskSelector(ShaderConvertWriteMask(shader)), linear);
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GLProgram& ps, bool linear)

View File

@ -2866,7 +2866,8 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
int(dRect.right - dRect.left), int(dRect.bottom - dRect.top));
DoStretchRect(static_cast<GSTextureVK*>(sTex), sRect, static_cast<GSTextureVK*>(dTex), dRect,
dTex ? m_convert[static_cast<int>(shader)] : m_present[static_cast<int>(shader)], linear, true);
dTex ? m_convert[static_cast<int>(shader)] : m_present[static_cast<int>(shader)], linear,
ShaderConvertWriteMask(shader) == 0xf);
}
void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red,
@ -3902,6 +3903,8 @@ bool GSDeviceVK::CompileConvertPipelines()
gpb.SetNoStencilState();
}
gpb.SetColorWriteMask(0, ShaderConvertWriteMask(i));
VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i));
if (ps == VK_NULL_HANDLE)
return false;

View File

@ -485,6 +485,14 @@ void Vulkan::GraphicsPipelineBuilder::SetBlendAttachment(u32 attachment, bool bl
}
}
void Vulkan::GraphicsPipelineBuilder::SetColorWriteMask(u32 attachment, VkColorComponentFlags write_mask)
{
pxAssert(attachment < MAX_ATTACHMENTS);
VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[attachment];
bs.colorWriteMask = write_mask;
}
void Vulkan::GraphicsPipelineBuilder::AddBlendFlags(u32 flags)
{
m_blend_state.flags |= flags;

View File

@ -130,6 +130,9 @@ namespace Vulkan
VkBlendOp op, VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, VkBlendOp alpha_op,
VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
void SetColorWriteMask(
u32 attachment, VkColorComponentFlags write_mask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
void AddBlendFlags(u32 flags);
void ClearBlendAttachments();