GS/HW: Add support for complex offset shuffles

This commit is contained in:
refractionpcsx2 2023-08-03 01:26:30 +01:00
parent e9c342ef74
commit 5a3ba4e563
13 changed files with 381 additions and 179 deletions

View File

@ -950,6 +950,15 @@ void ps_main()
#if PS_SHUFFLE #if PS_SHUFFLE
uvec4 denorm_c = uvec4(C); uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
C.ga = vec2(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
C.rb = C.ga;
#else
C.ga = C.rg;
C.rb = C.ga;
#endif
#else
#if PS_READ16_SRC #if PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5))); C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if (bool(denorm_c.a & 0x80u)) if (bool(denorm_c.a & 0x80u))
@ -995,6 +1004,7 @@ void ps_main()
#endif // PS_READ_BA #endif // PS_READ_BA
#endif // READ16_SRC #endif // READ16_SRC
#endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE #endif // PS_SHUFFLE
// Must be done before alpha correction // Must be done before alpha correction

View File

@ -277,7 +277,9 @@ void main()
#define PS_TCOFFSETHACK 0 #define PS_TCOFFSETHACK 0
#define PS_POINT_SAMPLER 0 #define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0 #define PS_SHUFFLE 0
#define PS_SHUFFLE_SAME 0
#define PS_READ_BA 0 #define PS_READ_BA 0
#define PS_WRITE_RG 0
#define PS_READ16_SRC 0 #define PS_READ16_SRC 0
#define PS_DFMT 0 #define PS_DFMT 0
#define PS_DEPTH_FMT 0 #define PS_DEPTH_FMT 0
@ -1197,6 +1199,17 @@ void main()
#if PS_SHUFFLE #if PS_SHUFFLE
uvec4 denorm_c = uvec4(C); uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather.
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
C.ga = vec2(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
C.rb = C.ga;
#else
C.ga = C.rg;
C.rb = C.ga;
#endif
#else
#if PS_READ16_SRC #if PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5))); C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if ((denorm_c.a & 0x80u) != 0u) if ((denorm_c.a & 0x80u) != 0u)
@ -1224,6 +1237,7 @@ void main()
#endif #endif
#endif #endif
#endif #endif
#endif
// Must be done before alpha correction // Must be done before alpha correction

View File

@ -316,6 +316,7 @@ struct alignas(16) GSHWDrawConfig
u32 ltf : 1; u32 ltf : 1;
// Shuffle and fbmask effect // Shuffle and fbmask effect
u32 shuffle : 1; u32 shuffle : 1;
u32 shuffle_same : 1;
u32 real16src: 1; u32 real16src: 1;
u32 read_ba : 1; u32 read_ba : 1;
u32 write_rg : 1; u32 write_rg : 1;

View File

@ -40,6 +40,7 @@ protected:
GSVector2i m_real_size{0, 0}; GSVector2i m_real_size{0, 0};
bool m_texture_shuffle = false; bool m_texture_shuffle = false;
bool m_copy_16bit_to_target_shuffle = false; bool m_copy_16bit_to_target_shuffle = false;
bool m_same_group_texture_shuffle = false;
virtual GSTexture* GetOutput(int i, float& scale, int& y_offset) = 0; virtual GSTexture* GetOutput(int i, float& scale, int& y_offset) = 0;
virtual GSTexture* GetFeedbackOutput(float& scale) { return nullptr; } virtual GSTexture* GetFeedbackOutput(float& scale) { return nullptr; }

View File

@ -337,7 +337,7 @@ void GSRendererHW::ExpandLineIndices()
} }
// Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color // Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
{ {
const u32 count = m_vertex.next; const u32 count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
@ -351,7 +351,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
const float tw = static_cast<float>(1u << m_cached_ctx.TEX0.TW); const float tw = static_cast<float>(1u << m_cached_ctx.TEX0.TW);
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S); int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S);
tex_pos &= 0xFF; tex_pos &= 0xFF;
read_ba = (tex_pos > 112 && tex_pos < 144); // "same group" means it can read blue and write alpha using C32 tricks
read_ba = (tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF00000);
// Another way of selecting whether to read RG/BA is to use region repeat. // Another way of selecting whether to read RG/BA is to use region repeat.
// Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015. // Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015.
@ -406,60 +407,51 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
return; return;
} }
bool half_bottom = false; bool half_bottom_vert = true;
switch (GSConfig.UserHacks_HalfBottomOverride) bool half_right_vert = true;
{ bool half_bottom_uv = true;
case 0: bool half_right_uv = true;
// Force Disabled.
// Force Disabled will help games such as Xenosaga.
// Xenosaga handles the half bottom as an vertex offset instead of a buffer offset which does the effect twice.
// Half bottom won't trigger a cache miss that skip the draw because it is still the normal buffer but with a vertices offset.
half_bottom = false;
break;
case 1:
// Force Enabled.
// Force Enabled will help games such as Superman Shadows of Apokolips, The Lord of the Rings: The Two Towers,
// Demon Stone, Midnight Club 3.
half_bottom = true;
break;
case -1:
default:
// Default, Automatic.
// Here's the idea
// TS effect is 16 bits but we emulate it on a 32 bits format
// Normally this means we need to divide size by 2.
//
// Some games do two TS effects on each half of the buffer.
// This makes a mess for us in the TC because we end up with two targets
// when we only want one, thus half screen bug.
//
// 32bits emulation means we can do the effect once but double the size.
// Test cases: Crash Twinsantiy and DBZ BT3
// Test Case: NFS: HP2 splits the effect h:256 and h:192 so 64
// Other games: Midnight Club 3 headlights, black bar in Xenosaga 3 dialogue,
// Firefighter FD18 fire occlusion, PSI Ops half screen green overlay, Lord of the Rings - Two Towers,
// Demon Stone , Sonic Unleashed, Lord of the Rings Two Towers,
// Superman Shadow of Apokolips, Matrix Path of Neo, Big Mutha Truckers
int maxvert = 0; if (m_same_group_texture_shuffle)
int minvert = 4096;
for (u32 i = 0; i < count; i++)
{ {
int YCord = 0; if (m_cached_ctx.FRAME.FBW != rt->m_TEX0.TBW && m_cached_ctx.FRAME.FBW == rt->m_TEX0.TBW * 2)
half_right_vert = false;
if (!PRIM->FST)
YCord = static_cast<int>((1 << m_cached_ctx.TEX0.TH) * (v[i].ST.T / v[i].RGBAQ.Q));
else else
YCord = (v[i].V >> 4); half_bottom_vert = false;
}
if (maxvert < YCord) else
maxvert = YCord; {
if (minvert > YCord) // Different source (maybe?)
minvert = YCord; // If a game does the texture and frame doubling differently, they can burn in hell.
if (m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block())
{
// No super source of truth here, since the width can get batted around, the valid is probably our best bet.
int tex_width = tex->m_target ? tex->m_from_target->m_valid.z : (tex->m_TEX0.TBW * 64);
int tex_tbw = tex->m_target ? tex->m_from_target_TEX0.TBW : tex->m_TEX0.TBW;
if (static_cast<int>(m_cached_ctx.TEX0.TBW * 64) >= std::min(tex_width * 2, 1024) && tex_tbw != m_cached_ctx.TEX0.TBW || (m_cached_ctx.TEX0.TBW * 64) < floor(m_vt.m_max.t.x))
{
half_right_uv = false;
half_right_vert = false;
}
else
{
half_bottom_uv = false;
half_bottom_vert = false;
}
}
else
{
if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)))
{
half_right_vert = false;
half_right_uv = false;
}
else
{
half_bottom_vert = false;
half_bottom_uv = false;
}
} }
half_bottom = minvert == 0 && m_r.height() <= maxvert;
break;
} }
if (PRIM->FST) if (PRIM->FST)
@ -478,7 +470,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
v[i + 1].U += 128u; v[i + 1].U += 128u;
if (!half_bottom) if (!half_bottom_vert)
{ {
// Height is too big (2x). // Height is too big (2x).
const int tex_offset = v[i].V & 0xF; const int tex_offset = v[i].V & 0xF;
@ -488,12 +480,16 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
tmp = GSVector4i(tmp - offset).srl32(1) + offset; tmp = GSVector4i(tmp - offset).srl32(1) + offset;
v[i].XYZ.Y = static_cast<u16>(tmp.x); v[i].XYZ.Y = static_cast<u16>(tmp.x);
v[i].V = static_cast<u16>(tmp.y);
v[i + 1].XYZ.Y = static_cast<u16>(tmp.z); v[i + 1].XYZ.Y = static_cast<u16>(tmp.z);
if (!half_bottom_uv)
{
v[i].V = static_cast<u16>(tmp.y);
v[i + 1].V = static_cast<u16>(tmp.w); v[i + 1].V = static_cast<u16>(tmp.w);
} }
} }
} }
}
else else
{ {
const float offset_8pix = 8.0f / tw; const float offset_8pix = 8.0f / tw;
@ -511,7 +507,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
v[i + 1].ST.S += offset_8pix; v[i + 1].ST.S += offset_8pix;
if (!half_bottom) if (!half_bottom_vert)
{ {
// Height is too big (2x). // Height is too big (2x).
const GSVector4i offset(o.OFY, o.OFY); const GSVector4i offset(o.OFY, o.OFY);
@ -521,12 +517,16 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
//fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y); //fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y);
v[i].XYZ.Y = static_cast<u16>(tmp.x); v[i].XYZ.Y = static_cast<u16>(tmp.x);
v[i].ST.T /= 2.0f;
v[i + 1].XYZ.Y = static_cast<u16>(tmp.y); v[i + 1].XYZ.Y = static_cast<u16>(tmp.y);
if (!half_bottom_uv)
{
v[i].ST.T /= 2.0f;
v[i + 1].ST.T /= 2.0f; v[i + 1].ST.T /= 2.0f;
} }
} }
} }
}
// Update vertex trace too. Avoid issue to compute bounding box // Update vertex trace too. Avoid issue to compute bounding box
if (write_ba) if (write_ba)
@ -534,21 +534,41 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
else else
m_vt.m_max.p.x += 8.0f; m_vt.m_max.p.x += 8.0f;
if (!half_bottom) if (!m_same_group_texture_shuffle)
{ {
const float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y;
m_vt.m_max.p.y -= delta_Y / 2.0f;
}
if (read_ba) if (read_ba)
m_vt.m_min.t.x -= 8.0f; m_vt.m_min.t.x -= 8.0f;
else else
m_vt.m_max.t.x += 8.0f; m_vt.m_max.t.x += 8.0f;
}
if (!half_bottom) if (!half_right_vert)
{ {
const float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y; m_vt.m_min.p.x /= 2.0f;
m_vt.m_max.t.y -= delta_T / 2.0f; m_vt.m_max.p.x /= 2.0f;
m_context->scissor.in.x = m_vt.m_min.p.x;
m_context->scissor.in.z = m_vt.m_max.p.x + 8.0f;
}
if (!half_bottom_vert)
{
m_vt.m_min.p.y /= 2.0f;
m_vt.m_max.p.y /= 2.0f;
m_context->scissor.in.y = m_vt.m_min.p.y;
m_context->scissor.in.w = m_vt.m_max.p.y + 8.0f;
}
// Only do this is the source is being interpreted as 16bit
if (!half_bottom_uv)
{
m_vt.m_min.t.y /= 2.0f;
m_vt.m_max.t.y /= 2.0f;
}
if (!half_right_uv)
{
m_vt.m_min.t.y /= 2.0f;
m_vt.m_max.t.y /= 2.0f;
} }
} }
@ -849,19 +869,22 @@ bool GSRendererHW::IsSplitTextureShuffle(u32 rt_tbw)
// If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up. // If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up.
// Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well. // Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well.
// "Potential" ones are for Jak3 which does a split shuffle on a 128x128 texture with a width of 256, writing to the lower half then offsetting 2 pages.
const u32 expected_next_FBP = (m_cached_ctx.FRAME.FBP + m_split_texture_shuffle_pages) + num_pages; const u32 expected_next_FBP = (m_cached_ctx.FRAME.FBP + m_split_texture_shuffle_pages) + num_pages;
const u32 potential_expected_next_FBP = m_cached_ctx.FRAME.FBP + ((m_context->FRAME.FBW * 64) / aligned_rc.width());
const u32 expected_next_TBP0 = (m_cached_ctx.TEX0.TBP0 + (m_split_texture_shuffle_pages + num_pages) * BLOCKS_PER_PAGE); const u32 expected_next_TBP0 = (m_cached_ctx.TEX0.TBP0 + (m_split_texture_shuffle_pages + num_pages) * BLOCKS_PER_PAGE);
const u32 potential_expected_next_TBP0 = m_cached_ctx.TEX0.TBP0 + (BLOCKS_PER_PAGE * ((m_context->TEX0.TBW * 64) / aligned_rc.width()));
GL_CACHE("IsSplitTextureShuffle: Draw covers %ux%u pages, next FRAME %x TEX %x", GL_CACHE("IsSplitTextureShuffle: Draw covers %ux%u pages, next FRAME %x TEX %x",
static_cast<u32>(aligned_rc.width()) / frame_psm.pgs.x, pages_high, expected_next_FBP * BLOCKS_PER_PAGE, static_cast<u32>(aligned_rc.width()) / frame_psm.pgs.x, pages_high, expected_next_FBP * BLOCKS_PER_PAGE,
expected_next_TBP0); expected_next_TBP0);
if (next_ctx.TEX0.TBP0 != expected_next_TBP0) if (next_ctx.TEX0.TBP0 != expected_next_TBP0 && next_ctx.TEX0.TBP0 != potential_expected_next_TBP0)
{ {
GL_CACHE("IsSplitTextureShuffle: Mismatch on TBP0, expecting %x, got %x", expected_next_TBP0, next_ctx.TEX0.TBP0); GL_CACHE("IsSplitTextureShuffle: Mismatch on TBP0, expecting %x, got %x", expected_next_TBP0, next_ctx.TEX0.TBP0);
return false; return false;
} }
// Some games don't offset the FBP. // Some games don't offset the FBP.
if (next_ctx.FRAME.FBP != expected_next_FBP && next_ctx.FRAME.FBP != m_cached_ctx.FRAME.FBP) if (next_ctx.FRAME.FBP != expected_next_FBP && next_ctx.FRAME.FBP != m_cached_ctx.FRAME.FBP && next_ctx.FRAME.FBP != potential_expected_next_FBP)
{ {
GL_CACHE("IsSplitTextureShuffle: Mismatch on FBP, expecting %x, got %x", expected_next_FBP * BLOCKS_PER_PAGE, GL_CACHE("IsSplitTextureShuffle: Mismatch on FBP, expecting %x, got %x", expected_next_FBP * BLOCKS_PER_PAGE,
next_ctx.FRAME.FBP * BLOCKS_PER_PAGE); next_ctx.FRAME.FBP * BLOCKS_PER_PAGE);
@ -1904,6 +1927,7 @@ void GSRendererHW::Draw()
m_texture_shuffle = false; m_texture_shuffle = false;
m_copy_16bit_to_target_shuffle = false; m_copy_16bit_to_target_shuffle = false;
m_same_group_texture_shuffle = false;
const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0); const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0);
if (is_split_texture_shuffle) if (is_split_texture_shuffle)
@ -2167,9 +2191,30 @@ void GSRendererHW::Draw()
GL_CACHE("Estimated texture region: %u,%u -> %u,%u", MIP_CLAMP.MINU, MIP_CLAMP.MINV, MIP_CLAMP.MAXU + 1, GL_CACHE("Estimated texture region: %u,%u -> %u,%u", MIP_CLAMP.MINU, MIP_CLAMP.MINV, MIP_CLAMP.MAXU + 1,
MIP_CLAMP.MAXV + 1); MIP_CLAMP.MAXV + 1);
} }
const bool possible_shuffle = m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 || IsPossibleChannelShuffle();
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear()) : GIFRegTEX0 FRAME_TEX0;
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, possible_shuffle, m_vt.IsLinear()); bool rt_32bit = false;
if (!no_rt && m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16)
{
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, true,
fm);
if (tgt)
rt_32bit = tgt->m_32_bits_fmt;
tgt = nullptr;
}
const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle();
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block()) :
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block());
if (unlikely(!src)) if (unlikely(!src))
{ {
GL_INS("ERROR: Source lookup failed, skipping."); GL_INS("ERROR: Source lookup failed, skipping.");
@ -2257,7 +2302,7 @@ void GSRendererHW::Draw()
} }
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::RenderTarget, true, rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_color, m_r); fm, false, force_preload, preserve_rt_color, m_r, src);
if (unlikely(!rt)) if (unlikely(!rt))
{ {
GL_INS("ERROR: Failed to create FRAME target, skipping."); GL_INS("ERROR: Failed to create FRAME target, skipping.");
@ -2281,7 +2326,7 @@ void GSRendererHW::Draw()
if (!ds) if (!ds)
{ {
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, m_r); m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, m_r, src);
if (unlikely(!ds)) if (unlikely(!ds))
{ {
GL_INS("ERROR: Failed to create ZBUF target, skipping."); GL_INS("ERROR: Failed to create ZBUF target, skipping.");
@ -2294,21 +2339,36 @@ void GSRendererHW::Draw()
if (process_texture) if (process_texture)
{ {
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r)+1;
const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() &&
src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) ||
(m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0));
if (rt) if (rt)
{ {
// copy of a 16bit source in to this target, make sure it's opaque and not bilinear to reduce false positives. // copy of a 16bit source in to this target, make sure it's opaque and not bilinear to reduce false positives.
m_copy_16bit_to_target_shuffle = m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque() m_copy_16bit_to_target_shuffle = m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque()
&& !(context->TEX1.MMIN & 1) && !src->m_32_bits_fmt && m_cached_ctx.FRAME.FBMSK; && !(context->TEX1.MMIN & 1) && !src->m_32_bits_fmt && m_cached_ctx.FRAME.FBMSK;
}
// It's not actually possible to do a C16->C16 texture shuffle of B to A as they are the same group
// However you can do it by using C32 and offsetting the target verticies to point to B A, then mask as appropriate.
m_same_group_texture_shuffle = draw_uses_target && (m_cached_ctx.TEX0.PSM & 0xE) == PSMCT32 && (m_cached_ctx.FRAME.PSM & 0x7) == PSMCT16 && (m_vt.m_min.p.x == 8.0f);
}
const GSVertex* v = &m_vertex.buff[0];
// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
// Initially code also tested the RT but it gives too much false-positive // Initially code also tested the RT but it gives too much false-positive
// const int first_x = (v[0].XYZ.X + 8) >> 4;
// Both input and output are 16 bits and texture was initially 32 bits! const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f);
m_texture_shuffle = (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) && (tex_psm.bpp == 16) const bool shuffle_coords = (first_x ^ first_u) & 8;
// Both input and output are 16 bits and texture was initially 32 bits! Same for the target, Sonic Unleash makes a new target which really is 16bit.
m_texture_shuffle = ((m_same_group_texture_shuffle || (tex_psm.bpp == 16)) && (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) &&
(shuffle_coords || rt->m_32_bits_fmt))
&& draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle);
/* const bool old_shuffle = ((m_same_group_texture_shuffle || (tex_psm.bpp == 16)) && (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16))
&& draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle); && draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle);
if (old_shuffle && !m_texture_shuffle)
DevCon.Warning("Here draw %d", s_n);*/
// Okami mustn't call this code // Okami mustn't call this code
if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_cached_ctx.FRAME.FBMSK & fm_mask) == 0)) if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_cached_ctx.FRAME.FBMSK & fm_mask) == 0))
{ {
@ -2318,7 +2378,6 @@ void GSRendererHW::Draw()
// Shadow of Memories/Destiny shouldn't call this code. // Shadow of Memories/Destiny shouldn't call this code.
// Causes shadow flickering. // Causes shadow flickering.
const GSVertex* v = &m_vertex.buff[0];
m_texture_shuffle = ((v[1].U - v[0].U) < 256) || m_texture_shuffle = ((v[1].U - v[0].U) < 256) ||
// Tomb Raider Angel of Darkness relies on this behavior to produce a fog effect. // Tomb Raider Angel of Darkness relies on this behavior to produce a fog effect.
// In this case, the address of the framebuffer and texture are the same. // In this case, the address of the framebuffer and texture are the same.
@ -2509,9 +2568,23 @@ void GSRendererHW::Draw()
GSTextureCache::Target* old_rt = nullptr; GSTextureCache::Target* old_rt = nullptr;
GSTextureCache::Target* old_ds = nullptr; GSTextureCache::Target* old_ds = nullptr;
{ {
GSVector2i new_size = t_size;
// We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size.
if (rt && m_texture_shuffle && m_split_texture_shuffle_pages == 0)
{
if (new_size.x > rt->m_valid.z || new_size.y > rt->m_valid.w)
{
if (new_size.y <= rt->m_valid.w && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))
new_size.x /= 2;
else
new_size.y /= 2;
}
}
// We still need to make sure the dimensions of the targets match. // We still need to make sure the dimensions of the targets match.
const int new_w = std::max(t_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0));
const int new_h = std::max(t_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0));
if (rt) if (rt)
{ {
const u32 old_end_block = rt->m_end_block; const u32 old_end_block = rt->m_end_block;
@ -2522,6 +2595,7 @@ void GSRendererHW::Draw()
pxAssert(rt->GetScale() == target_scale); pxAssert(rt->GetScale() == target_scale);
if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h)
GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h);
rt->ResizeTexture(new_w, new_h); rt->ResizeTexture(new_w, new_h);
if (!m_texture_shuffle && !m_channel_shuffle) if (!m_texture_shuffle && !m_channel_shuffle)
@ -2531,8 +2605,8 @@ void GSRendererHW::Draw()
} }
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); rt->UpdateValidity(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
rt->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); rt->UpdateDrawn(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
// Probably changing to double buffering, so invalidate any old target that was next to it. // Probably changing to double buffering, so invalidate any old target that was next to it.
// This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing.
// Grandia Xtreme, Onimusha Warlord. // Grandia Xtreme, Onimusha Warlord.
@ -2735,7 +2809,7 @@ void GSRendererHW::Draw()
{ {
//rt->m_valid = rt->m_valid.runion(r); //rt->m_valid = rt->m_valid.runion(r);
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); rt->UpdateValidity(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false); g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false);
@ -2748,7 +2822,7 @@ void GSRendererHW::Draw()
{ {
//ds->m_valid = ds->m_valid.runion(r); //ds->m_valid = ds->m_valid.runion(r);
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2) && !m_texture_shuffle);
g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false); g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false);
@ -2996,7 +3070,7 @@ void GSRendererHW::EmulateZbuffer(const GSTextureCache::Target* ds)
} }
} }
void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt) void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex)
{ {
// Uncomment to disable texture shuffle emulation. // Uncomment to disable texture shuffle emulation.
// m_texture_shuffle = false; // m_texture_shuffle = false;
@ -3039,7 +3113,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
bool write_ba; bool write_ba;
bool read_ba; bool read_ba;
ConvertSpriteTextureShuffle(write_ba, read_ba); ConvertSpriteTextureShuffle(write_ba, read_ba, rt, tex);
// If date is enabled you need to test the green channel instead of the // If date is enabled you need to test the green channel instead of the
// alpha channel. Only enable this code in DATE mode to reduce the number // alpha channel. Only enable this code in DATE mode to reduce the number
@ -3048,6 +3122,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
m_conf.ps.read_ba = read_ba; m_conf.ps.read_ba = read_ba;
m_conf.ps.real16src = m_copy_16bit_to_target_shuffle; m_conf.ps.real16src = m_copy_16bit_to_target_shuffle;
m_conf.ps.shuffle_same = m_same_group_texture_shuffle;
// Please bang my head against the wall! // Please bang my head against the wall!
// 1/ Reduce the frame mask to a 16 bit format // 1/ Reduce the frame mask to a 16 bit format
const u32 m = m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk; const u32 m = m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk;
@ -4101,7 +4176,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
// Force a 32 bits access (normally shuffle is done on 16 bits) // Force a 32 bits access (normally shuffle is done on 16 bits)
// m_ps_sel.tex_fmt = 0; // removed as an optimization // m_ps_sel.tex_fmt = 0; // removed as an optimization
m_conf.ps.aem = TEXA.AEM; m_conf.ps.aem = TEXA.AEM;
ASSERT(tex->m_target); //ASSERT(tex->m_target);
// Require a float conversion if the texure is a depth otherwise uses Integral scaling // Require a float conversion if the texure is a depth otherwise uses Integral scaling
if (psm.depth) if (psm.depth)
@ -4669,7 +4744,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_prim_overlap = PrimitiveOverlap(); m_prim_overlap = PrimitiveOverlap();
EmulateTextureShuffleAndFbmask(rt); EmulateTextureShuffleAndFbmask(rt, tex);
const GSDevice::FeatureSupport features = g_gs_device->Features(); const GSDevice::FeatureSupport features = g_gs_device->Features();

View File

@ -86,7 +86,7 @@ private:
void ResetStates(); void ResetStates();
void SetupIA(float target_scale, float sx, float sy); void SetupIA(float target_scale, float sx, float sy);
void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt); void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex);
bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only);
void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass); void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
@ -194,7 +194,7 @@ public:
void Lines2Sprites(); void Lines2Sprites();
bool VerifyIndices(); bool VerifyIndices();
void ExpandLineIndices(); void ExpandLineIndices();
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba); void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex); GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale); GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
void MergeSprite(GSTextureCache::Source* tex); void MergeSprite(GSTextureCache::Source* tex);

View File

@ -586,7 +586,7 @@ __ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, cons
return nullptr; return nullptr;
} }
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, bool palette) GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, const u32 frame_fbp, bool palette)
{ {
if (GSConfig.UserHacks_DisableDepthSupport) if (GSConfig.UserHacks_DisableDepthSupport)
{ {
@ -700,7 +700,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
{ {
for (Target* t : m_dst[RenderTarget]) for (Target* t : m_dst[RenderTarget])
{ {
if (t->m_age <= 1 && t->m_TEX0.TBP0 == bp && t->HasValidAlpha()) if (t->m_age <= 1 && t->m_TEX0.TBP0 == bp && t->m_TEX0.TBW == TEX0.TBW && t->HasValidAlpha())
{ {
GL_CACHE("TC depth: Using RT %x instead of depth because of missing alpha", t->m_TEX0.TBP0); GL_CACHE("TC depth: Using RT %x instead of depth because of missing alpha", t->m_TEX0.TBP0);
@ -776,7 +776,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
else else
{ {
// This is a bit of a worry, since it could load junk from local memory... but it's better than skipping the draw. // This is a bit of a worry, since it could load junk from local memory... but it's better than skipping the draw.
return LookupSource(TEX0, TEXA, CLAMP, r, nullptr, possible_shuffle, linear); return LookupSource(TEX0, TEXA, CLAMP, r, nullptr, possible_shuffle, linear, frame_fbp);
} }
ASSERT(src->m_texture); ASSERT(src->m_texture);
@ -785,7 +785,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
return src; return src;
} }
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear) GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear, const u32 frame_fbp)
{ {
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH); GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH);
@ -1031,15 +1031,38 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't.
// Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3)
else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && psm >= PSMCT32 && else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && psm >= PSMCT32 &&
psm <= PSMCT16S && GSUtil::HasCompatibleBits(t->m_TEX0.PSM, psm) && (t->Overlaps(bp, bw, psm, r) || t->Wraps()) && psm <= PSMCT16S && (GSUtil::HasCompatibleBits(t->m_TEX0.PSM, psm) ||
(possible_shuffle && t->m_TEX0.PSM <= PSMCT24 && ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == bp))
&& (t->Overlaps(bp, bw, psm, r) || t->Wraps()) &&
t->m_age <= 1 && (!found_t || dst->m_TEX0.TBW < bw)) t->m_age <= 1 && (!found_t || dst->m_TEX0.TBW < bw))
{ {
// PSM equality needed because CreateSource does not handle PSM conversion. // PSM equality needed because CreateSource does not handle PSM conversion.
// Only inclusive hit to limit false hits. // Only inclusive hit to limit false hits.
GSVector4i rect = r;
int src_bw = bw;
int src_psm = psm;
// If the input is C16 and it's actually a shuffle of 32bits we need to correct the size.
if ((t->m_TEX0.PSM & 0xF) == PSMCT32 && (psm & 0x7) == PSMCT16 && possible_shuffle)
{
src_psm = t->m_TEX0.PSM;
// If it's taking double width for the shuffle, half that.
if (src_bw == (t->m_TEX0.TBW * 2))
{
src_bw = t->m_TEX0.TBW;
rect.x /= 2;
rect.z /= 2;
}
else
{
rect.y /= 2;
rect.w /= 2;
}
}
if (bp > t->m_TEX0.TBP0) if (bp > t->m_TEX0.TBP0)
{ {
GSVector4i new_rect = r; GSVector4i new_rect = rect;
if (linear) if (linear)
{ {
new_rect.z -= 1; new_rect.z -= 1;
@ -1052,10 +1075,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
const bool can_translate = CanTranslate(bp, bw, psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW); const bool can_translate = CanTranslate(bp, bw, psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW);
if (can_translate) if (can_translate)
{ {
const bool swizzle_match = GSLocalMemory::m_psm[psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth; const bool swizzle_match = GSLocalMemory::m_psm[src_psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
const GSVector2i& page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs; const GSVector2i& page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs;
const GSVector4i page_mask(GSVector4i((page_size.x - 1), (page_size.y - 1)).xyxy()); const GSVector4i page_mask(GSVector4i((page_size.x - 1), (page_size.y - 1)).xyxy());
GSVector4i rect = new_rect & ~page_mask; rect = new_rect & ~page_mask;
if (swizzle_match) if (swizzle_match)
{ {
@ -1068,18 +1091,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// If it's not page aligned, grab the whole pages it covers, to be safe. // If it's not page aligned, grab the whole pages it covers, to be safe.
if (GSLocalMemory::m_psm[psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) if (GSLocalMemory::m_psm[psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp)
{ {
const GSVector2i& dst_page_size = GSLocalMemory::m_psm[psm].pgs; const GSVector2i& dst_page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs;
rect = GSVector4i(rect.x / page_size.x, rect.y / page_size.y, (rect.z + (page_size.x - 1)) / page_size.x, (rect.w + (page_size.y - 1)) / page_size.y); new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y);
rect = GSVector4i(rect.x * dst_page_size.x, rect.y * dst_page_size.y, rect.z * dst_page_size.x, rect.w * dst_page_size.y); new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y);
} }
else else
{ {
rect.x &= ~(page_size.x - 1); new_rect.x &= ~(page_size.x - 1);
rect.y &= ~(page_size.y - 1); new_rect.y &= ~(page_size.y - 1);
rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1); new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1); new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
} }
rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, bw, rect); rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, bw, new_rect);
rect.x -= new_rect.x & ~(page_size.y - 1); rect.x -= new_rect.x & ~(page_size.y - 1);
rect.y -= new_rect.x & ~(page_size.y - 1); rect.y -= new_rect.x & ~(page_size.y - 1);
} }
@ -1107,11 +1130,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
dst = t; dst = t;
tex_merge_rt = false; tex_merge_rt = false;
found_t = true; found_t = true;
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
break;
else
continue; continue;
} }
else else
{ {
SurfaceOffset so = ComputeSurfaceOffset(bp, bw, psm, r, t); SurfaceOffset so = ComputeSurfaceOffset(bp, bw, psm, new_rect, t);
if (!so.is_valid && t->Wraps()) if (!so.is_valid && t->Wraps())
{ {
// Improves Beyond Good & Evil shadow. // Improves Beyond Good & Evil shadow.
@ -1127,14 +1153,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
tex_merge_rt = false; tex_merge_rt = false;
found_t = true; found_t = true;
// Keep looking, just in case there is an exact match (Situation: Target frame drawn inside target frame, current makes a separate texture) // Keep looking, just in case there is an exact match (Situation: Target frame drawn inside target frame, current makes a separate texture)
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
break;
else
continue; continue;
} }
} }
if (linear)
{
new_rect.z += 1;
new_rect.w += 1;
}
} }
else else
{ {
@ -1161,6 +1185,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
dst = t; dst = t;
tex_merge_rt = false; tex_merge_rt = false;
found_t = true; found_t = true;
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
break;
else
continue; continue;
} }
@ -1175,6 +1202,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Prefer a target inside over a target outside. // Prefer a target inside over a target outside.
found_t = false; found_t = false;
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
break;
else
continue; continue;
} }
} }
@ -1208,11 +1238,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
GIFRegTEX0 depth_TEX0; GIFRegTEX0 depth_TEX0;
depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
depth_TEX0.U32[1] = TEX0.U32[1]; depth_TEX0.U32[1] = TEX0.U32[1];
return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r, possible_shuffle, linear); return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r, possible_shuffle, linear, frame_fbp);
} }
else else
{ {
return LookupDepthSource(TEX0, TEXA, CLAMP, r, possible_shuffle, linear, true); return LookupDepthSource(TEX0, TEXA, CLAMP, r, possible_shuffle, linear, frame_fbp, true);
} }
} }
} }
@ -1512,7 +1542,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst_match = t; dst_match = t;
} }
} }
// We only want to use a matched target if it's actually being used.
if (dst_match) if (dst_match)
{ {
calcRescale(dst_match); calcRescale(dst_match);
@ -1537,6 +1567,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_valid_alpha_high = dst_match->m_valid_alpha_high && psm_s.trbpp != 24; dst->m_valid_alpha_high = dst_match->m_valid_alpha_high && psm_s.trbpp != 24;
dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_valid_rgb = dst_match->m_valid_rgb;
if(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16)
dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries).
ShaderConvert shader; ShaderConvert shader;
// m_32_bits_fmt gets set on a shuffle or if the format isn't 16bit. // m_32_bits_fmt gets set on a shuffle or if the format isn't 16bit.
// In this case it needs to make sure it isn't part of a shuffle, where it needs to be interpreted as 32bits. // In this case it needs to make sure it isn't part of a shuffle, where it needs to be interpreted as 32bits.
@ -1616,7 +1649,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
} }
GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, float scale, int type, GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect) bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect, GSTextureCache::Source* src)
{ {
if (type == DepthStencil) if (type == DepthStencil)
{ {
@ -1631,7 +1664,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true); Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true);
PreloadTarget(TEX0, size, valid_size, is_frame, preload, preserve_target, draw_rect, dst);; PreloadTarget(TEX0, size, valid_size, is_frame, preload, preserve_target, draw_rect, dst, src);
dst->m_is_frame = is_frame; dst->m_is_frame = is_frame;
@ -1654,7 +1687,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
} }
void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame, void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst) bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst, GSTextureCache::Source* src)
{ {
// In theory new textures contain invalidated data. Still in theory a new target // In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory. // must contains the content of the GS memory.
@ -1786,10 +1819,11 @@ void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
auto j = i; auto j = i;
Target* t = *j; Target* t = *j;
if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM)
if(t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid))
{
// could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half.
if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0)
t->m_TEX0.PSM == dst->m_TEX0.PSM &&
((((t->m_end_block + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0)
{ {
//DevCon.Warning("Found one %x->%x BW %d PSM %x (new target %x->%x BW %d PSM %x)", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW, t->m_TEX0.PSM, dst->m_TEX0.TBP0, dst->m_end_block, dst->m_TEX0.TBW, dst->m_TEX0.PSM); //DevCon.Warning("Found one %x->%x BW %d PSM %x (new target %x->%x BW %d PSM %x)", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW, t->m_TEX0.PSM, dst->m_TEX0.TBP0, dst->m_end_block, dst->m_TEX0.TBW, dst->m_TEX0.PSM);
GSVector4i new_valid = t->m_valid; GSVector4i new_valid = t->m_valid;
@ -1797,6 +1831,50 @@ void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
t->ResizeValidity(new_valid); t->ResizeValidity(new_valid);
return; return;
} }
// The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize.
else if (((((dst->UnwrappedEndBlock() + 1) - dst->m_TEX0.TBP0) >> 1) + dst->m_TEX0.TBP0) == t->m_TEX0.TBP0)
{
if (dst->m_TEX0.TBW == 2)
{
i++;
continue;
}
int overlapping_pages = ((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5;
int y_reduction = (overlapping_pages / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y;
if (y_reduction == 0 || (overlapping_pages % dst->m_TEX0.TBW))
{
i++;
continue;
}
const int copy_width = (t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth();
const int copy_height = y_reduction * t->m_scale;
const int old_height = (dst->m_valid.w - y_reduction) * dst->m_scale;
GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, old_height);
// Clear the dirty first
dst->Update();
// Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing.
g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), 0, old_height);
if (src && src->m_target && src->m_from_target == t)
{
// This should never happen as we're making a new target so the src should never be something it overlaps, but just incase..
GSVector4i new_valid = t->m_valid;
new_valid.y = std::max(new_valid.y - y_reduction, 0);
new_valid.w = std::max(new_valid.w - y_reduction, 0);
t->m_TEX0.TBP0 += (y_reduction / GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) << 5;
t->ResizeValidity(new_valid);
}
else
{
InvalidateSourcesFromTarget(t);
i = list.erase(j);
delete t;
}
return;
}
}
i++; i++;
} }
} }
@ -3702,6 +3780,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0)); src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0));
} }
} }
src->m_32_bits_fmt = dst->m_32_bits_fmt;
if (psm.pal > 0) if (psm.pal > 0)
{ {
@ -3762,6 +3841,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0)); src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0));
} }
} }
src->m_32_bits_fmt = dst->m_32_bits_fmt;
dst->Update(); dst->Update();

View File

@ -424,7 +424,7 @@ protected:
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region); Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
void PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame, void PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst); bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst, GSTextureCache::Source* src = nullptr);
// Returns scaled texture size. // Returns scaled texture size.
static GSVector2i ScaleRenderTargetSize(const GSVector2i& sz, float scale); static GSVector2i ScaleRenderTargetSize(const GSVector2i& sz, float scale);
@ -473,8 +473,8 @@ public:
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size); GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size);
std::shared_ptr<Palette> LookupPaletteObject(const u32* clut, u16 pal, bool need_gs_texture); std::shared_ptr<Palette> LookupPaletteObject(const u32* clut, u16 pal, bool need_gs_texture);
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear); Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear, const u32 frame_fbp = 0xFFFFFFFF);
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, bool palette = false); Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, const u32 frame_fbp = 0xFFFFFFFF, bool palette = false);
Target* FindTargetOverlap(Target* target, int type, int psm); Target* FindTargetOverlap(Target* target, int type, int psm);
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
@ -482,7 +482,7 @@ public:
const GSVector4i draw_rc = GSVector4i::zero()); const GSVector4i draw_rc = GSVector4i::zero());
Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
const GSVector4i draw_rc = GSVector4i::zero()); const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr);
Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale); Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale);
/// Looks up a target in the cache, and only returns it if the BP/BW match exactly. /// Looks up a target in the cache, and only returns it if the BP/BW match exactly.

View File

@ -1803,6 +1803,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT); setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT);
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC); setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG); setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);

View File

@ -182,6 +182,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_ADJT, GSMTLConstantIndex_PS_ADJT,
GSMTLConstantIndex_PS_LTF, GSMTLConstantIndex_PS_LTF,
GSMTLConstantIndex_PS_SHUFFLE, GSMTLConstantIndex_PS_SHUFFLE,
GSMTLConstantIndex_PS_SHUFFLE_SAME,
GSMTLConstantIndex_PS_READ_BA, GSMTLConstantIndex_PS_READ_BA,
GSMTLConstantIndex_PS_READ16_SRC, GSMTLConstantIndex_PS_READ16_SRC,
GSMTLConstantIndex_PS_WRITE_RG, GSMTLConstantIndex_PS_WRITE_RG,

View File

@ -41,6 +41,7 @@ constant bool PS_ADJS [[function_constant(GSMTLConstantIndex_PS_AD
constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_ADJT)]]; constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_ADJT)]];
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]]; constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]]; constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
constant bool PS_SHUFFLE_SAME [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_SAME)]];
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]]; constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]]; constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]];
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]]; constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
@ -1021,6 +1022,21 @@ struct PSMain
uint4 denorm_c = uint4(C); uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f); uint2 denorm_TA = uint2(cb.ta * 255.5f);
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
{
C.ga = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
C.rb = C.ga;
}
else
{
C.ga = C.rg;
C.rb = C.ga;
}
}
else
{
if (PS_READ16_SRC) if (PS_READ16_SRC)
{ {
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5); C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
@ -1038,6 +1054,7 @@ struct PSMain
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
} }
} }
}
// Must be done before alpha correction // Must be done before alpha correction

View File

@ -1378,6 +1378,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_BLEND_D {}\n", sel.blend_d) + fmt::format("#define PS_BLEND_D {}\n", sel.blend_d)
+ fmt::format("#define PS_IIP {}\n", sel.iip) + fmt::format("#define PS_IIP {}\n", sel.iip)
+ fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle) + fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle)
+ fmt::format("#define PS_SHUFFLE_SAME {}\n", sel.shuffle_same)
+ fmt::format("#define PS_READ_BA {}\n", sel.read_ba) + fmt::format("#define PS_READ_BA {}\n", sel.read_ba)
+ fmt::format("#define PS_READ16_SRC {}\n", sel.real16src) + fmt::format("#define PS_READ16_SRC {}\n", sel.real16src)
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg) + fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)

View File

@ -4669,6 +4669,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_FIXED_ONE_A", sel.fixed_one_a); AddMacro(ss, "PS_FIXED_ONE_A", sel.fixed_one_a);
AddMacro(ss, "PS_IIP", sel.iip); AddMacro(ss, "PS_IIP", sel.iip);
AddMacro(ss, "PS_SHUFFLE", sel.shuffle); AddMacro(ss, "PS_SHUFFLE", sel.shuffle);
AddMacro(ss, "PS_SHUFFLE_SAME", sel.shuffle_same);
AddMacro(ss, "PS_READ_BA", sel.read_ba); AddMacro(ss, "PS_READ_BA", sel.read_ba);
AddMacro(ss, "PS_READ16_SRC", sel.real16src); AddMacro(ss, "PS_READ16_SRC", sel.real16src);
AddMacro(ss, "PS_WRITE_RG", sel.write_rg); AddMacro(ss, "PS_WRITE_RG", sel.write_rg);