mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Add support for complex offset shuffles
This commit is contained in:
parent
e9c342ef74
commit
5a3ba4e563
|
@ -950,6 +950,15 @@ void ps_main()
|
|||
#if PS_SHUFFLE
|
||||
uvec4 denorm_c = uvec4(C);
|
||||
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
|
||||
#if PS_SHUFFLE_SAME
|
||||
#if (PS_READ_BA)
|
||||
C.ga = vec2(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
|
||||
C.rb = C.ga;
|
||||
#else
|
||||
C.ga = C.rg;
|
||||
C.rb = C.ga;
|
||||
#endif
|
||||
#else
|
||||
#if PS_READ16_SRC
|
||||
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
|
||||
if (bool(denorm_c.a & 0x80u))
|
||||
|
@ -995,6 +1004,7 @@ void ps_main()
|
|||
#endif // PS_READ_BA
|
||||
|
||||
#endif // READ16_SRC
|
||||
#endif // PS_SHUFFLE_SAME
|
||||
#endif // PS_SHUFFLE
|
||||
|
||||
// Must be done before alpha correction
|
||||
|
|
|
@ -277,7 +277,9 @@ void main()
|
|||
#define PS_TCOFFSETHACK 0
|
||||
#define PS_POINT_SAMPLER 0
|
||||
#define PS_SHUFFLE 0
|
||||
#define PS_SHUFFLE_SAME 0
|
||||
#define PS_READ_BA 0
|
||||
#define PS_WRITE_RG 0
|
||||
#define PS_READ16_SRC 0
|
||||
#define PS_DFMT 0
|
||||
#define PS_DEPTH_FMT 0
|
||||
|
@ -1197,30 +1199,42 @@ void main()
|
|||
#if PS_SHUFFLE
|
||||
uvec4 denorm_c = uvec4(C);
|
||||
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
|
||||
#if PS_READ16_SRC
|
||||
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
// Mask will take care of the correct destination
|
||||
#if PS_READ_BA
|
||||
C.rb = C.bb;
|
||||
|
||||
// Special case for 32bit input and 16bit output, shuffle used by The Godfather.
|
||||
#if PS_SHUFFLE_SAME
|
||||
#if (PS_READ_BA)
|
||||
C.ga = vec2(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
|
||||
C.rb = C.ga;
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
C.ga = C.rg;
|
||||
C.rb = C.ga;
|
||||
#endif
|
||||
|
||||
#if PS_READ_BA
|
||||
#else
|
||||
#if PS_READ16_SRC
|
||||
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
// Mask will take care of the correct destination
|
||||
#if PS_READ_BA
|
||||
C.rb = C.bb;
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
#endif
|
||||
|
||||
#if PS_READ_BA
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -316,6 +316,7 @@ struct alignas(16) GSHWDrawConfig
|
|||
u32 ltf : 1;
|
||||
// Shuffle and fbmask effect
|
||||
u32 shuffle : 1;
|
||||
u32 shuffle_same : 1;
|
||||
u32 real16src: 1;
|
||||
u32 read_ba : 1;
|
||||
u32 write_rg : 1;
|
||||
|
|
|
@ -40,6 +40,7 @@ protected:
|
|||
GSVector2i m_real_size{0, 0};
|
||||
bool m_texture_shuffle = false;
|
||||
bool m_copy_16bit_to_target_shuffle = false;
|
||||
bool m_same_group_texture_shuffle = false;
|
||||
|
||||
virtual GSTexture* GetOutput(int i, float& scale, int& y_offset) = 0;
|
||||
virtual GSTexture* GetFeedbackOutput(float& scale) { return nullptr; }
|
||||
|
|
|
@ -337,7 +337,7 @@ void GSRendererHW::ExpandLineIndices()
|
|||
}
|
||||
|
||||
// Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color
|
||||
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
||||
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
|
||||
{
|
||||
const u32 count = m_vertex.next;
|
||||
GSVertex* v = &m_vertex.buff[0];
|
||||
|
@ -351,7 +351,8 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
const float tw = static_cast<float>(1u << m_cached_ctx.TEX0.TW);
|
||||
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S);
|
||||
tex_pos &= 0xFF;
|
||||
read_ba = (tex_pos > 112 && tex_pos < 144);
|
||||
// "same group" means it can read blue and write alpha using C32 tricks
|
||||
read_ba = (tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF00000);
|
||||
|
||||
// Another way of selecting whether to read RG/BA is to use region repeat.
|
||||
// Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015.
|
||||
|
@ -406,60 +407,51 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
return;
|
||||
}
|
||||
|
||||
bool half_bottom = false;
|
||||
switch (GSConfig.UserHacks_HalfBottomOverride)
|
||||
bool half_bottom_vert = true;
|
||||
bool half_right_vert = true;
|
||||
bool half_bottom_uv = true;
|
||||
bool half_right_uv = true;
|
||||
|
||||
if (m_same_group_texture_shuffle)
|
||||
{
|
||||
case 0:
|
||||
// Force Disabled.
|
||||
// Force Disabled will help games such as Xenosaga.
|
||||
// Xenosaga handles the half bottom as an vertex offset instead of a buffer offset which does the effect twice.
|
||||
// Half bottom won't trigger a cache miss that skip the draw because it is still the normal buffer but with a vertices offset.
|
||||
half_bottom = false;
|
||||
break;
|
||||
case 1:
|
||||
// Force Enabled.
|
||||
// Force Enabled will help games such as Superman Shadows of Apokolips, The Lord of the Rings: The Two Towers,
|
||||
// Demon Stone, Midnight Club 3.
|
||||
half_bottom = true;
|
||||
break;
|
||||
case -1:
|
||||
default:
|
||||
// Default, Automatic.
|
||||
// Here's the idea
|
||||
// TS effect is 16 bits but we emulate it on a 32 bits format
|
||||
// Normally this means we need to divide size by 2.
|
||||
//
|
||||
// Some games do two TS effects on each half of the buffer.
|
||||
// This makes a mess for us in the TC because we end up with two targets
|
||||
// when we only want one, thus half screen bug.
|
||||
//
|
||||
// 32bits emulation means we can do the effect once but double the size.
|
||||
// Test cases: Crash Twinsantiy and DBZ BT3
|
||||
// Test Case: NFS: HP2 splits the effect h:256 and h:192 so 64
|
||||
// Other games: Midnight Club 3 headlights, black bar in Xenosaga 3 dialogue,
|
||||
// Firefighter FD18 fire occlusion, PSI Ops half screen green overlay, Lord of the Rings - Two Towers,
|
||||
// Demon Stone , Sonic Unleashed, Lord of the Rings Two Towers,
|
||||
// Superman Shadow of Apokolips, Matrix Path of Neo, Big Mutha Truckers
|
||||
|
||||
int maxvert = 0;
|
||||
int minvert = 4096;
|
||||
for (u32 i = 0; i < count; i++)
|
||||
if (m_cached_ctx.FRAME.FBW != rt->m_TEX0.TBW && m_cached_ctx.FRAME.FBW == rt->m_TEX0.TBW * 2)
|
||||
half_right_vert = false;
|
||||
else
|
||||
half_bottom_vert = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Different source (maybe?)
|
||||
// If a game does the texture and frame doubling differently, they can burn in hell.
|
||||
if (m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block())
|
||||
{
|
||||
// No super source of truth here, since the width can get batted around, the valid is probably our best bet.
|
||||
int tex_width = tex->m_target ? tex->m_from_target->m_valid.z : (tex->m_TEX0.TBW * 64);
|
||||
int tex_tbw = tex->m_target ? tex->m_from_target_TEX0.TBW : tex->m_TEX0.TBW;
|
||||
if (static_cast<int>(m_cached_ctx.TEX0.TBW * 64) >= std::min(tex_width * 2, 1024) && tex_tbw != m_cached_ctx.TEX0.TBW || (m_cached_ctx.TEX0.TBW * 64) < floor(m_vt.m_max.t.x))
|
||||
{
|
||||
int YCord = 0;
|
||||
|
||||
if (!PRIM->FST)
|
||||
YCord = static_cast<int>((1 << m_cached_ctx.TEX0.TH) * (v[i].ST.T / v[i].RGBAQ.Q));
|
||||
else
|
||||
YCord = (v[i].V >> 4);
|
||||
|
||||
if (maxvert < YCord)
|
||||
maxvert = YCord;
|
||||
if (minvert > YCord)
|
||||
minvert = YCord;
|
||||
half_right_uv = false;
|
||||
half_right_vert = false;
|
||||
}
|
||||
|
||||
half_bottom = minvert == 0 && m_r.height() <= maxvert;
|
||||
break;
|
||||
else
|
||||
{
|
||||
half_bottom_uv = false;
|
||||
half_bottom_vert = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)))
|
||||
{
|
||||
half_right_vert = false;
|
||||
half_right_uv = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
half_bottom_vert = false;
|
||||
half_bottom_uv = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (PRIM->FST)
|
||||
|
@ -469,16 +461,16 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
for (u32 i = 0; i < count; i += 2)
|
||||
{
|
||||
if (write_ba)
|
||||
v[i].XYZ.X -= 128u;
|
||||
v[i].XYZ.X -= 128u;
|
||||
else
|
||||
v[i+1].XYZ.X += 128u;
|
||||
v[i + 1].XYZ.X += 128u;
|
||||
|
||||
if (read_ba)
|
||||
v[i].U -= 128u;
|
||||
v[i].U -= 128u;
|
||||
else
|
||||
v[i+1].U += 128u;
|
||||
v[i + 1].U += 128u;
|
||||
|
||||
if (!half_bottom)
|
||||
if (!half_bottom_vert)
|
||||
{
|
||||
// Height is too big (2x).
|
||||
const int tex_offset = v[i].V & 0xF;
|
||||
|
@ -488,9 +480,13 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
||||
|
||||
v[i].XYZ.Y = static_cast<u16>(tmp.x);
|
||||
v[i].V = static_cast<u16>(tmp.y);
|
||||
v[i + 1].XYZ.Y = static_cast<u16>(tmp.z);
|
||||
v[i + 1].V = static_cast<u16>(tmp.w);
|
||||
|
||||
if (!half_bottom_uv)
|
||||
{
|
||||
v[i].V = static_cast<u16>(tmp.y);
|
||||
v[i + 1].V = static_cast<u16>(tmp.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -502,16 +498,16 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
for (u32 i = 0; i < count; i += 2)
|
||||
{
|
||||
if (write_ba)
|
||||
v[i].XYZ.X -= 128u;
|
||||
v[i].XYZ.X -= 128u;
|
||||
else
|
||||
v[i+1].XYZ.X += 128u;
|
||||
v[i + 1].XYZ.X += 128u;
|
||||
|
||||
if (read_ba)
|
||||
v[i].ST.S -= offset_8pix;
|
||||
v[i].ST.S -= offset_8pix;
|
||||
else
|
||||
v[i+1].ST.S += offset_8pix;
|
||||
v[i + 1].ST.S += offset_8pix;
|
||||
|
||||
if (!half_bottom)
|
||||
if (!half_bottom_vert)
|
||||
{
|
||||
// Height is too big (2x).
|
||||
const GSVector4i offset(o.OFY, o.OFY);
|
||||
|
@ -521,9 +517,13 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
|
||||
//fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y);
|
||||
v[i].XYZ.Y = static_cast<u16>(tmp.x);
|
||||
v[i].ST.T /= 2.0f;
|
||||
v[i + 1].XYZ.Y = static_cast<u16>(tmp.y);
|
||||
v[i + 1].ST.T /= 2.0f;
|
||||
|
||||
if (!half_bottom_uv)
|
||||
{
|
||||
v[i].ST.T /= 2.0f;
|
||||
v[i + 1].ST.T /= 2.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -534,21 +534,41 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
|||
else
|
||||
m_vt.m_max.p.x += 8.0f;
|
||||
|
||||
if (!half_bottom)
|
||||
if (!m_same_group_texture_shuffle)
|
||||
{
|
||||
const float delta_Y = m_vt.m_max.p.y - m_vt.m_min.p.y;
|
||||
m_vt.m_max.p.y -= delta_Y / 2.0f;
|
||||
if (read_ba)
|
||||
m_vt.m_min.t.x -= 8.0f;
|
||||
else
|
||||
m_vt.m_max.t.x += 8.0f;
|
||||
}
|
||||
|
||||
if (read_ba)
|
||||
m_vt.m_min.t.x -= 8.0f;
|
||||
else
|
||||
m_vt.m_max.t.x += 8.0f;
|
||||
|
||||
if (!half_bottom)
|
||||
if (!half_right_vert)
|
||||
{
|
||||
const float delta_T = m_vt.m_max.t.y - m_vt.m_min.t.y;
|
||||
m_vt.m_max.t.y -= delta_T / 2.0f;
|
||||
m_vt.m_min.p.x /= 2.0f;
|
||||
m_vt.m_max.p.x /= 2.0f;
|
||||
m_context->scissor.in.x = m_vt.m_min.p.x;
|
||||
m_context->scissor.in.z = m_vt.m_max.p.x + 8.0f;
|
||||
}
|
||||
|
||||
if (!half_bottom_vert)
|
||||
{
|
||||
m_vt.m_min.p.y /= 2.0f;
|
||||
m_vt.m_max.p.y /= 2.0f;
|
||||
m_context->scissor.in.y = m_vt.m_min.p.y;
|
||||
m_context->scissor.in.w = m_vt.m_max.p.y + 8.0f;
|
||||
}
|
||||
|
||||
// Only do this is the source is being interpreted as 16bit
|
||||
if (!half_bottom_uv)
|
||||
{
|
||||
m_vt.m_min.t.y /= 2.0f;
|
||||
m_vt.m_max.t.y /= 2.0f;
|
||||
}
|
||||
|
||||
if (!half_right_uv)
|
||||
{
|
||||
m_vt.m_min.t.y /= 2.0f;
|
||||
m_vt.m_max.t.y /= 2.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -849,19 +869,22 @@ bool GSRendererHW::IsSplitTextureShuffle(u32 rt_tbw)
|
|||
|
||||
// If this is a split texture shuffle, the next draw's FRAME/TEX0 should line up.
|
||||
// Re-add the offset we subtracted in Draw() to get the original FBP/TBP0.. this won't handle wrapping. Oh well.
|
||||
// "Potential" ones are for Jak3 which does a split shuffle on a 128x128 texture with a width of 256, writing to the lower half then offsetting 2 pages.
|
||||
const u32 expected_next_FBP = (m_cached_ctx.FRAME.FBP + m_split_texture_shuffle_pages) + num_pages;
|
||||
const u32 potential_expected_next_FBP = m_cached_ctx.FRAME.FBP + ((m_context->FRAME.FBW * 64) / aligned_rc.width());
|
||||
const u32 expected_next_TBP0 = (m_cached_ctx.TEX0.TBP0 + (m_split_texture_shuffle_pages + num_pages) * BLOCKS_PER_PAGE);
|
||||
const u32 potential_expected_next_TBP0 = m_cached_ctx.TEX0.TBP0 + (BLOCKS_PER_PAGE * ((m_context->TEX0.TBW * 64) / aligned_rc.width()));
|
||||
GL_CACHE("IsSplitTextureShuffle: Draw covers %ux%u pages, next FRAME %x TEX %x",
|
||||
static_cast<u32>(aligned_rc.width()) / frame_psm.pgs.x, pages_high, expected_next_FBP * BLOCKS_PER_PAGE,
|
||||
expected_next_TBP0);
|
||||
if (next_ctx.TEX0.TBP0 != expected_next_TBP0)
|
||||
if (next_ctx.TEX0.TBP0 != expected_next_TBP0 && next_ctx.TEX0.TBP0 != potential_expected_next_TBP0)
|
||||
{
|
||||
GL_CACHE("IsSplitTextureShuffle: Mismatch on TBP0, expecting %x, got %x", expected_next_TBP0, next_ctx.TEX0.TBP0);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Some games don't offset the FBP.
|
||||
if (next_ctx.FRAME.FBP != expected_next_FBP && next_ctx.FRAME.FBP != m_cached_ctx.FRAME.FBP)
|
||||
if (next_ctx.FRAME.FBP != expected_next_FBP && next_ctx.FRAME.FBP != m_cached_ctx.FRAME.FBP && next_ctx.FRAME.FBP != potential_expected_next_FBP)
|
||||
{
|
||||
GL_CACHE("IsSplitTextureShuffle: Mismatch on FBP, expecting %x, got %x", expected_next_FBP * BLOCKS_PER_PAGE,
|
||||
next_ctx.FRAME.FBP * BLOCKS_PER_PAGE);
|
||||
|
@ -1904,6 +1927,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
m_texture_shuffle = false;
|
||||
m_copy_16bit_to_target_shuffle = false;
|
||||
m_same_group_texture_shuffle = false;
|
||||
|
||||
const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0);
|
||||
if (is_split_texture_shuffle)
|
||||
|
@ -2167,9 +2191,30 @@ void GSRendererHW::Draw()
|
|||
GL_CACHE("Estimated texture region: %u,%u -> %u,%u", MIP_CLAMP.MINU, MIP_CLAMP.MINV, MIP_CLAMP.MAXU + 1,
|
||||
MIP_CLAMP.MAXV + 1);
|
||||
}
|
||||
const bool possible_shuffle = m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 || IsPossibleChannelShuffle();
|
||||
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear()) :
|
||||
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, possible_shuffle, m_vt.IsLinear());
|
||||
|
||||
GIFRegTEX0 FRAME_TEX0;
|
||||
bool rt_32bit = false;
|
||||
if (!no_rt && m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16)
|
||||
{
|
||||
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
|
||||
FRAME_TEX0.U64 = 0;
|
||||
FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block();
|
||||
FRAME_TEX0.TBW = m_cached_ctx.FRAME.FBW;
|
||||
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
|
||||
|
||||
GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, true,
|
||||
fm);
|
||||
|
||||
if (tgt)
|
||||
rt_32bit = tgt->m_32_bits_fmt;
|
||||
|
||||
tgt = nullptr;
|
||||
}
|
||||
const bool possible_shuffle = ((rt_32bit && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0) || IsPossibleChannelShuffle();
|
||||
|
||||
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block()) :
|
||||
g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block());
|
||||
|
||||
if (unlikely(!src))
|
||||
{
|
||||
GL_INS("ERROR: Source lookup failed, skipping.");
|
||||
|
@ -2257,7 +2302,7 @@ void GSRendererHW::Draw()
|
|||
}
|
||||
|
||||
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::RenderTarget, true,
|
||||
fm, false, force_preload, preserve_rt_color, m_r);
|
||||
fm, false, force_preload, preserve_rt_color, m_r, src);
|
||||
if (unlikely(!rt))
|
||||
{
|
||||
GL_INS("ERROR: Failed to create FRAME target, skipping.");
|
||||
|
@ -2281,7 +2326,7 @@ void GSRendererHW::Draw()
|
|||
if (!ds)
|
||||
{
|
||||
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
|
||||
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, m_r);
|
||||
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, m_r, src);
|
||||
if (unlikely(!ds))
|
||||
{
|
||||
GL_INS("ERROR: Failed to create ZBUF target, skipping.");
|
||||
|
@ -2294,21 +2339,36 @@ void GSRendererHW::Draw()
|
|||
if (process_texture)
|
||||
{
|
||||
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
|
||||
|
||||
const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r)+1;
|
||||
const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() &&
|
||||
src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) ||
|
||||
(m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0));
|
||||
|
||||
if (rt)
|
||||
{
|
||||
// copy of a 16bit source in to this target, make sure it's opaque and not bilinear to reduce false positives.
|
||||
m_copy_16bit_to_target_shuffle = m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque()
|
||||
&& !(context->TEX1.MMIN & 1) && !src->m_32_bits_fmt && m_cached_ctx.FRAME.FBMSK;
|
||||
}
|
||||
|
||||
// It's not actually possible to do a C16->C16 texture shuffle of B to A as they are the same group
|
||||
// However you can do it by using C32 and offsetting the target verticies to point to B A, then mask as appropriate.
|
||||
m_same_group_texture_shuffle = draw_uses_target && (m_cached_ctx.TEX0.PSM & 0xE) == PSMCT32 && (m_cached_ctx.FRAME.PSM & 0x7) == PSMCT16 && (m_vt.m_min.p.x == 8.0f);
|
||||
}
|
||||
const GSVertex* v = &m_vertex.buff[0];
|
||||
// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
|
||||
// Initially code also tested the RT but it gives too much false-positive
|
||||
//
|
||||
// Both input and output are 16 bits and texture was initially 32 bits!
|
||||
m_texture_shuffle = (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) && (tex_psm.bpp == 16)
|
||||
const int first_x = (v[0].XYZ.X + 8) >> 4;
|
||||
const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f);
|
||||
const bool shuffle_coords = (first_x ^ first_u) & 8;
|
||||
// Both input and output are 16 bits and texture was initially 32 bits! Same for the target, Sonic Unleash makes a new target which really is 16bit.
|
||||
m_texture_shuffle = ((m_same_group_texture_shuffle || (tex_psm.bpp == 16)) && (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) &&
|
||||
(shuffle_coords || rt->m_32_bits_fmt))
|
||||
&& draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle);
|
||||
/* const bool old_shuffle = ((m_same_group_texture_shuffle || (tex_psm.bpp == 16)) && (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16))
|
||||
&& draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle);
|
||||
|
||||
if (old_shuffle && !m_texture_shuffle)
|
||||
DevCon.Warning("Here draw %d", s_n);*/
|
||||
// Okami mustn't call this code
|
||||
if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_cached_ctx.FRAME.FBMSK & fm_mask) == 0))
|
||||
{
|
||||
|
@ -2318,7 +2378,6 @@ void GSRendererHW::Draw()
|
|||
|
||||
// Shadow of Memories/Destiny shouldn't call this code.
|
||||
// Causes shadow flickering.
|
||||
const GSVertex* v = &m_vertex.buff[0];
|
||||
m_texture_shuffle = ((v[1].U - v[0].U) < 256) ||
|
||||
// Tomb Raider Angel of Darkness relies on this behavior to produce a fog effect.
|
||||
// In this case, the address of the framebuffer and texture are the same.
|
||||
|
@ -2509,9 +2568,23 @@ void GSRendererHW::Draw()
|
|||
GSTextureCache::Target* old_rt = nullptr;
|
||||
GSTextureCache::Target* old_ds = nullptr;
|
||||
{
|
||||
GSVector2i new_size = t_size;
|
||||
|
||||
// We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size.
|
||||
if (rt && m_texture_shuffle && m_split_texture_shuffle_pages == 0)
|
||||
{
|
||||
if (new_size.x > rt->m_valid.z || new_size.y > rt->m_valid.w)
|
||||
{
|
||||
if (new_size.y <= rt->m_valid.w && (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))
|
||||
new_size.x /= 2;
|
||||
else
|
||||
new_size.y /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
// We still need to make sure the dimensions of the targets match.
|
||||
const int new_w = std::max(t_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0));
|
||||
const int new_h = std::max(t_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0));
|
||||
const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0));
|
||||
const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0));
|
||||
if (rt)
|
||||
{
|
||||
const u32 old_end_block = rt->m_end_block;
|
||||
|
@ -2522,6 +2595,7 @@ void GSRendererHW::Draw()
|
|||
pxAssert(rt->GetScale() == target_scale);
|
||||
if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h)
|
||||
GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h);
|
||||
|
||||
rt->ResizeTexture(new_w, new_h);
|
||||
|
||||
if (!m_texture_shuffle && !m_channel_shuffle)
|
||||
|
@ -2531,8 +2605,8 @@ void GSRendererHW::Draw()
|
|||
}
|
||||
|
||||
// Limit to 2x the vertical height of the resolution (for double buffering)
|
||||
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
|
||||
rt->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2));
|
||||
rt->UpdateValidity(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
|
||||
rt->UpdateDrawn(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
|
||||
// Probably changing to double buffering, so invalidate any old target that was next to it.
|
||||
// This resolves an issue where the PCRTC will find the old target in FMV's causing flashing.
|
||||
// Grandia Xtreme, Onimusha Warlord.
|
||||
|
@ -2735,7 +2809,7 @@ void GSRendererHW::Draw()
|
|||
{
|
||||
//rt->m_valid = rt->m_valid.runion(r);
|
||||
// Limit to 2x the vertical height of the resolution (for double buffering)
|
||||
rt->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
|
||||
rt->UpdateValidity(m_r, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
|
||||
|
||||
g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false);
|
||||
|
||||
|
@ -2748,7 +2822,7 @@ void GSRendererHW::Draw()
|
|||
{
|
||||
//ds->m_valid = ds->m_valid.runion(r);
|
||||
// Limit to 2x the vertical height of the resolution (for double buffering)
|
||||
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2));
|
||||
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2) && !m_texture_shuffle);
|
||||
|
||||
g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false);
|
||||
|
||||
|
@ -2996,7 +3070,7 @@ void GSRendererHW::EmulateZbuffer(const GSTextureCache::Target* ds)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
|
||||
void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex)
|
||||
{
|
||||
// Uncomment to disable texture shuffle emulation.
|
||||
// m_texture_shuffle = false;
|
||||
|
@ -3039,7 +3113,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
|
|||
bool write_ba;
|
||||
bool read_ba;
|
||||
|
||||
ConvertSpriteTextureShuffle(write_ba, read_ba);
|
||||
ConvertSpriteTextureShuffle(write_ba, read_ba, rt, tex);
|
||||
|
||||
// If date is enabled you need to test the green channel instead of the
|
||||
// alpha channel. Only enable this code in DATE mode to reduce the number
|
||||
|
@ -3048,6 +3122,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt)
|
|||
|
||||
m_conf.ps.read_ba = read_ba;
|
||||
m_conf.ps.real16src = m_copy_16bit_to_target_shuffle;
|
||||
m_conf.ps.shuffle_same = m_same_group_texture_shuffle;
|
||||
// Please bang my head against the wall!
|
||||
// 1/ Reduce the frame mask to a 16 bit format
|
||||
const u32 m = m_cached_ctx.FRAME.FBMSK & GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].fmsk;
|
||||
|
@ -4101,7 +4176,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
|
|||
// Force a 32 bits access (normally shuffle is done on 16 bits)
|
||||
// m_ps_sel.tex_fmt = 0; // removed as an optimization
|
||||
m_conf.ps.aem = TEXA.AEM;
|
||||
ASSERT(tex->m_target);
|
||||
//ASSERT(tex->m_target);
|
||||
|
||||
// Require a float conversion if the texure is a depth otherwise uses Integral scaling
|
||||
if (psm.depth)
|
||||
|
@ -4669,7 +4744,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||
|
||||
m_prim_overlap = PrimitiveOverlap();
|
||||
|
||||
EmulateTextureShuffleAndFbmask(rt);
|
||||
EmulateTextureShuffleAndFbmask(rt, tex);
|
||||
|
||||
const GSDevice::FeatureSupport features = g_gs_device->Features();
|
||||
|
||||
|
|
|
@ -86,7 +86,7 @@ private:
|
|||
|
||||
void ResetStates();
|
||||
void SetupIA(float target_scale, float sx, float sy);
|
||||
void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt);
|
||||
void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
||||
bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only);
|
||||
void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
|
||||
|
||||
|
@ -194,7 +194,7 @@ public:
|
|||
void Lines2Sprites();
|
||||
bool VerifyIndices();
|
||||
void ExpandLineIndices();
|
||||
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
|
||||
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
||||
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
||||
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
|
||||
void MergeSprite(GSTextureCache::Source* tex);
|
||||
|
|
|
@ -586,7 +586,7 @@ __ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, cons
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, bool palette)
|
||||
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, const u32 frame_fbp, bool palette)
|
||||
{
|
||||
if (GSConfig.UserHacks_DisableDepthSupport)
|
||||
{
|
||||
|
@ -700,7 +700,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
{
|
||||
for (Target* t : m_dst[RenderTarget])
|
||||
{
|
||||
if (t->m_age <= 1 && t->m_TEX0.TBP0 == bp && t->HasValidAlpha())
|
||||
if (t->m_age <= 1 && t->m_TEX0.TBP0 == bp && t->m_TEX0.TBW == TEX0.TBW && t->HasValidAlpha())
|
||||
{
|
||||
GL_CACHE("TC depth: Using RT %x instead of depth because of missing alpha", t->m_TEX0.TBP0);
|
||||
|
||||
|
@ -776,7 +776,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
else
|
||||
{
|
||||
// This is a bit of a worry, since it could load junk from local memory... but it's better than skipping the draw.
|
||||
return LookupSource(TEX0, TEXA, CLAMP, r, nullptr, possible_shuffle, linear);
|
||||
return LookupSource(TEX0, TEXA, CLAMP, r, nullptr, possible_shuffle, linear, frame_fbp);
|
||||
}
|
||||
|
||||
ASSERT(src->m_texture);
|
||||
|
@ -785,7 +785,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
return src;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear)
|
||||
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear, const u32 frame_fbp)
|
||||
{
|
||||
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH);
|
||||
|
||||
|
@ -1031,15 +1031,38 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
// Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't.
|
||||
// Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3)
|
||||
else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && psm >= PSMCT32 &&
|
||||
psm <= PSMCT16S && GSUtil::HasCompatibleBits(t->m_TEX0.PSM, psm) && (t->Overlaps(bp, bw, psm, r) || t->Wraps()) &&
|
||||
t->m_age <= 1 && (!found_t || dst->m_TEX0.TBW < bw))
|
||||
psm <= PSMCT16S && (GSUtil::HasCompatibleBits(t->m_TEX0.PSM, psm) ||
|
||||
(possible_shuffle && t->m_TEX0.PSM <= PSMCT24 && ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == bp))
|
||||
&& (t->Overlaps(bp, bw, psm, r) || t->Wraps()) &&
|
||||
t->m_age <= 1 && (!found_t || dst->m_TEX0.TBW < bw))
|
||||
{
|
||||
// PSM equality needed because CreateSource does not handle PSM conversion.
|
||||
// Only inclusive hit to limit false hits.
|
||||
GSVector4i rect = r;
|
||||
int src_bw = bw;
|
||||
int src_psm = psm;
|
||||
|
||||
// If the input is C16 and it's actually a shuffle of 32bits we need to correct the size.
|
||||
if ((t->m_TEX0.PSM & 0xF) == PSMCT32 && (psm & 0x7) == PSMCT16 && possible_shuffle)
|
||||
{
|
||||
src_psm = t->m_TEX0.PSM;
|
||||
// If it's taking double width for the shuffle, half that.
|
||||
if (src_bw == (t->m_TEX0.TBW * 2))
|
||||
{
|
||||
src_bw = t->m_TEX0.TBW;
|
||||
|
||||
rect.x /= 2;
|
||||
rect.z /= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
rect.y /= 2;
|
||||
rect.w /= 2;
|
||||
}
|
||||
}
|
||||
if (bp > t->m_TEX0.TBP0)
|
||||
{
|
||||
GSVector4i new_rect = r;
|
||||
GSVector4i new_rect = rect;
|
||||
if (linear)
|
||||
{
|
||||
new_rect.z -= 1;
|
||||
|
@ -1052,10 +1075,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
const bool can_translate = CanTranslate(bp, bw, psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW);
|
||||
if (can_translate)
|
||||
{
|
||||
const bool swizzle_match = GSLocalMemory::m_psm[psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
|
||||
const bool swizzle_match = GSLocalMemory::m_psm[src_psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth;
|
||||
const GSVector2i& page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs;
|
||||
const GSVector4i page_mask(GSVector4i((page_size.x - 1), (page_size.y - 1)).xyxy());
|
||||
GSVector4i rect = new_rect & ~page_mask;
|
||||
rect = new_rect & ~page_mask;
|
||||
|
||||
if (swizzle_match)
|
||||
{
|
||||
|
@ -1068,18 +1091,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
// If it's not page aligned, grab the whole pages it covers, to be safe.
|
||||
if (GSLocalMemory::m_psm[psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp)
|
||||
{
|
||||
const GSVector2i& dst_page_size = GSLocalMemory::m_psm[psm].pgs;
|
||||
rect = GSVector4i(rect.x / page_size.x, rect.y / page_size.y, (rect.z + (page_size.x - 1)) / page_size.x, (rect.w + (page_size.y - 1)) / page_size.y);
|
||||
rect = GSVector4i(rect.x * dst_page_size.x, rect.y * dst_page_size.y, rect.z * dst_page_size.x, rect.w * dst_page_size.y);
|
||||
const GSVector2i& dst_page_size = GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs;
|
||||
new_rect = GSVector4i(new_rect.x / page_size.x, new_rect.y / page_size.y, (new_rect.z + (page_size.x - 1)) / page_size.x, (new_rect.w + (page_size.y - 1)) / page_size.y);
|
||||
new_rect = GSVector4i(new_rect.x * dst_page_size.x, new_rect.y * dst_page_size.y, new_rect.z * dst_page_size.x, new_rect.w * dst_page_size.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
rect.x &= ~(page_size.x - 1);
|
||||
rect.y &= ~(page_size.y - 1);
|
||||
rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
|
||||
rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
|
||||
new_rect.x &= ~(page_size.x - 1);
|
||||
new_rect.y &= ~(page_size.y - 1);
|
||||
new_rect.z = (new_rect.z + (page_size.x - 1)) & ~(page_size.x - 1);
|
||||
new_rect.w = (new_rect.w + (page_size.y - 1)) & ~(page_size.y - 1);
|
||||
}
|
||||
rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, bw, rect);
|
||||
rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, bw, new_rect);
|
||||
rect.x -= new_rect.x & ~(page_size.y - 1);
|
||||
rect.y -= new_rect.x & ~(page_size.y - 1);
|
||||
}
|
||||
|
@ -1107,11 +1130,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
dst = t;
|
||||
tex_merge_rt = false;
|
||||
found_t = true;
|
||||
continue;
|
||||
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
SurfaceOffset so = ComputeSurfaceOffset(bp, bw, psm, r, t);
|
||||
SurfaceOffset so = ComputeSurfaceOffset(bp, bw, psm, new_rect, t);
|
||||
if (!so.is_valid && t->Wraps())
|
||||
{
|
||||
// Improves Beyond Good & Evil shadow.
|
||||
|
@ -1127,14 +1153,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
tex_merge_rt = false;
|
||||
found_t = true;
|
||||
// Keep looking, just in case there is an exact match (Situation: Target frame drawn inside target frame, current makes a separate texture)
|
||||
continue;
|
||||
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (linear)
|
||||
{
|
||||
new_rect.z += 1;
|
||||
new_rect.w += 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1161,7 +1185,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
dst = t;
|
||||
tex_merge_rt = false;
|
||||
found_t = true;
|
||||
continue;
|
||||
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strictly speaking this path is no longer needed, but I'm leaving it here for now because Guitar
|
||||
|
@ -1175,7 +1202,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
|
||||
// Prefer a target inside over a target outside.
|
||||
found_t = false;
|
||||
continue;
|
||||
if (dst->m_TEX0.TBP0 == frame_fbp && possible_shuffle)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1208,11 +1238,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
GIFRegTEX0 depth_TEX0;
|
||||
depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
|
||||
depth_TEX0.U32[1] = TEX0.U32[1];
|
||||
return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r, possible_shuffle, linear);
|
||||
return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r, possible_shuffle, linear, frame_fbp);
|
||||
}
|
||||
else
|
||||
{
|
||||
return LookupDepthSource(TEX0, TEXA, CLAMP, r, possible_shuffle, linear, true);
|
||||
return LookupDepthSource(TEX0, TEXA, CLAMP, r, possible_shuffle, linear, frame_fbp, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1512,7 +1542,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
dst_match = t;
|
||||
}
|
||||
}
|
||||
|
||||
// We only want to use a matched target if it's actually being used.
|
||||
if (dst_match)
|
||||
{
|
||||
calcRescale(dst_match);
|
||||
|
@ -1537,6 +1567,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
dst->m_valid_alpha_high = dst_match->m_valid_alpha_high && psm_s.trbpp != 24;
|
||||
dst->m_valid_rgb = dst_match->m_valid_rgb;
|
||||
|
||||
if(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16)
|
||||
dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries).
|
||||
|
||||
ShaderConvert shader;
|
||||
// m_32_bits_fmt gets set on a shuffle or if the format isn't 16bit.
|
||||
// In this case it needs to make sure it isn't part of a shuffle, where it needs to be interpreted as 32bits.
|
||||
|
@ -1616,7 +1649,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
}
|
||||
|
||||
GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, float scale, int type,
|
||||
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect)
|
||||
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_target, const GSVector4i draw_rect, GSTextureCache::Source* src)
|
||||
{
|
||||
if (type == DepthStencil)
|
||||
{
|
||||
|
@ -1631,7 +1664,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
|
||||
Target* dst = Target::Create(TEX0, size.x, size.y, scale, type, true);
|
||||
|
||||
PreloadTarget(TEX0, size, valid_size, is_frame, preload, preserve_target, draw_rect, dst);;
|
||||
PreloadTarget(TEX0, size, valid_size, is_frame, preload, preserve_target, draw_rect, dst, src);
|
||||
|
||||
dst->m_is_frame = is_frame;
|
||||
|
||||
|
@ -1654,7 +1687,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(GIFRegTEX0 TEX0, const GSVe
|
|||
}
|
||||
|
||||
void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
|
||||
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst)
|
||||
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst, GSTextureCache::Source* src)
|
||||
{
|
||||
// In theory new textures contain invalidated data. Still in theory a new target
|
||||
// must contains the content of the GS memory.
|
||||
|
@ -1786,17 +1819,62 @@ void GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
|
|||
auto j = i;
|
||||
Target* t = *j;
|
||||
|
||||
// could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half.
|
||||
if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW &&
|
||||
t->m_TEX0.PSM == dst->m_TEX0.PSM &&
|
||||
((((t->m_end_block + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0)
|
||||
{
|
||||
//DevCon.Warning("Found one %x->%x BW %d PSM %x (new target %x->%x BW %d PSM %x)", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW, t->m_TEX0.PSM, dst->m_TEX0.TBP0, dst->m_end_block, dst->m_TEX0.TBW, dst->m_TEX0.PSM);
|
||||
GSVector4i new_valid = t->m_valid;
|
||||
new_valid.w /= 2;
|
||||
t->ResizeValidity(new_valid);
|
||||
return;
|
||||
}
|
||||
if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM)
|
||||
if(t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid))
|
||||
{
|
||||
// could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half.
|
||||
if (((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0) == dst->m_TEX0.TBP0)
|
||||
{
|
||||
//DevCon.Warning("Found one %x->%x BW %d PSM %x (new target %x->%x BW %d PSM %x)", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW, t->m_TEX0.PSM, dst->m_TEX0.TBP0, dst->m_end_block, dst->m_TEX0.TBW, dst->m_TEX0.PSM);
|
||||
GSVector4i new_valid = t->m_valid;
|
||||
new_valid.w /= 2;
|
||||
t->ResizeValidity(new_valid);
|
||||
return;
|
||||
}
|
||||
// The new texture is behind it but engulfs the whole thing, shrink the new target so it grows in the HW Draw resize.
|
||||
else if (((((dst->UnwrappedEndBlock() + 1) - dst->m_TEX0.TBP0) >> 1) + dst->m_TEX0.TBP0) == t->m_TEX0.TBP0)
|
||||
{
|
||||
if (dst->m_TEX0.TBW == 2)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
int overlapping_pages = ((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5;
|
||||
int y_reduction = (overlapping_pages / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y;
|
||||
|
||||
if (y_reduction == 0 || (overlapping_pages % dst->m_TEX0.TBW))
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const int copy_width = (t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth();
|
||||
const int copy_height = y_reduction * t->m_scale;
|
||||
const int old_height = (dst->m_valid.w - y_reduction) * dst->m_scale;
|
||||
GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, old_height);
|
||||
|
||||
// Clear the dirty first
|
||||
dst->Update();
|
||||
// Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing.
|
||||
g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), 0, old_height);
|
||||
if (src && src->m_target && src->m_from_target == t)
|
||||
{
|
||||
// This should never happen as we're making a new target so the src should never be something it overlaps, but just incase..
|
||||
GSVector4i new_valid = t->m_valid;
|
||||
new_valid.y = std::max(new_valid.y - y_reduction, 0);
|
||||
new_valid.w = std::max(new_valid.w - y_reduction, 0);
|
||||
t->m_TEX0.TBP0 += (y_reduction / GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) << 5;
|
||||
t->ResizeValidity(new_valid);
|
||||
}
|
||||
else
|
||||
{
|
||||
InvalidateSourcesFromTarget(t);
|
||||
i = list.erase(j);
|
||||
delete t;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
@ -3702,6 +3780,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0));
|
||||
}
|
||||
}
|
||||
src->m_32_bits_fmt = dst->m_32_bits_fmt;
|
||||
|
||||
if (psm.pal > 0)
|
||||
{
|
||||
|
@ -3762,6 +3841,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
src->m_alpha_minmax.second = (using_both ? std::max(TEXA.TA1, TEXA.TA0) : (using_ta1 ? TEXA.TA1 : TEXA.TA0));
|
||||
}
|
||||
}
|
||||
src->m_32_bits_fmt = dst->m_32_bits_fmt;
|
||||
|
||||
dst->Update();
|
||||
|
||||
|
|
|
@ -424,7 +424,7 @@ protected:
|
|||
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
|
||||
|
||||
void PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size, bool is_frame,
|
||||
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst);
|
||||
bool preload, bool preserve_target, const GSVector4i draw_rect, Target* dst, GSTextureCache::Source* src = nullptr);
|
||||
|
||||
// Returns scaled texture size.
|
||||
static GSVector2i ScaleRenderTargetSize(const GSVector2i& sz, float scale);
|
||||
|
@ -473,8 +473,8 @@ public:
|
|||
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size);
|
||||
std::shared_ptr<Palette> LookupPaletteObject(const u32* clut, u16 pal, bool need_gs_texture);
|
||||
|
||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear);
|
||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, bool palette = false);
|
||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod, const bool possible_shuffle, const bool linear, const u32 frame_fbp = 0xFFFFFFFF);
|
||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const bool possible_shuffle, const bool linear, const u32 frame_fbp = 0xFFFFFFFF, bool palette = false);
|
||||
|
||||
Target* FindTargetOverlap(Target* target, int type, int psm);
|
||||
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
|
||||
|
@ -482,7 +482,7 @@ public:
|
|||
const GSVector4i draw_rc = GSVector4i::zero());
|
||||
Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0,
|
||||
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
|
||||
const GSVector4i draw_rc = GSVector4i::zero());
|
||||
const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr);
|
||||
Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale);
|
||||
|
||||
/// Looks up a target in the cache, and only returns it if the BP/BW match exactly.
|
||||
|
|
|
@ -1803,6 +1803,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
|
|||
setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT);
|
||||
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
|
||||
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
|
||||
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
|
||||
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
|
||||
|
|
|
@ -182,6 +182,7 @@ enum GSMTLFnConstants
|
|||
GSMTLConstantIndex_PS_ADJT,
|
||||
GSMTLConstantIndex_PS_LTF,
|
||||
GSMTLConstantIndex_PS_SHUFFLE,
|
||||
GSMTLConstantIndex_PS_SHUFFLE_SAME,
|
||||
GSMTLConstantIndex_PS_READ_BA,
|
||||
GSMTLConstantIndex_PS_READ16_SRC,
|
||||
GSMTLConstantIndex_PS_WRITE_RG,
|
||||
|
|
|
@ -41,6 +41,7 @@ constant bool PS_ADJS [[function_constant(GSMTLConstantIndex_PS_AD
|
|||
constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_ADJT)]];
|
||||
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
|
||||
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
|
||||
constant bool PS_SHUFFLE_SAME [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_SAME)]];
|
||||
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
|
||||
constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]];
|
||||
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
|
||||
|
@ -1021,21 +1022,37 @@ struct PSMain
|
|||
uint4 denorm_c = uint4(C);
|
||||
uint2 denorm_TA = uint2(cb.ta * 255.5f);
|
||||
|
||||
if (PS_READ16_SRC)
|
||||
if (PS_SHUFFLE_SAME)
|
||||
{
|
||||
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
|
||||
if (denorm_c.a & 0x80)
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
|
||||
if (PS_READ_BA)
|
||||
{
|
||||
C.ga = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
|
||||
C.rb = C.ga;
|
||||
}
|
||||
else
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
|
||||
{
|
||||
C.ga = C.rg;
|
||||
C.rb = C.ga;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
C.rb = PS_READ_BA ? C.bb : C.rr;
|
||||
if (PS_READ_BA)
|
||||
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
if (PS_READ16_SRC)
|
||||
{
|
||||
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
|
||||
if (denorm_c.a & 0x80)
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
else
|
||||
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
{
|
||||
C.rb = PS_READ_BA ? C.bb : C.rr;
|
||||
if (PS_READ_BA)
|
||||
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
else
|
||||
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1378,6 +1378,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||
+ fmt::format("#define PS_BLEND_D {}\n", sel.blend_d)
|
||||
+ fmt::format("#define PS_IIP {}\n", sel.iip)
|
||||
+ fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle)
|
||||
+ fmt::format("#define PS_SHUFFLE_SAME {}\n", sel.shuffle_same)
|
||||
+ fmt::format("#define PS_READ_BA {}\n", sel.read_ba)
|
||||
+ fmt::format("#define PS_READ16_SRC {}\n", sel.real16src)
|
||||
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)
|
||||
|
|
|
@ -4669,6 +4669,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
|||
AddMacro(ss, "PS_FIXED_ONE_A", sel.fixed_one_a);
|
||||
AddMacro(ss, "PS_IIP", sel.iip);
|
||||
AddMacro(ss, "PS_SHUFFLE", sel.shuffle);
|
||||
AddMacro(ss, "PS_SHUFFLE_SAME", sel.shuffle_same);
|
||||
AddMacro(ss, "PS_READ_BA", sel.read_ba);
|
||||
AddMacro(ss, "PS_READ16_SRC", sel.real16src);
|
||||
AddMacro(ss, "PS_WRITE_RG", sel.write_rg);
|
||||
|
|
Loading…
Reference in New Issue