This commit is contained in:
refractionpcsx2 2024-12-30 19:35:44 +00:00 committed by GitHub
commit 574f0bc664
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 517 additions and 183 deletions

View File

@ -945,7 +945,7 @@ vec4 ps_color()
vec4 T = sample_color(st); vec4 T = sample_color(st);
#endif #endif
#if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME #if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T); uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8u); T.r = float((denorm_c_before.b << 3) & 0xF8u);
@ -1320,7 +1320,7 @@ void main()
ps_blend(C, alpha_blend); ps_blend(C, alpha_blend);
#if PS_SHUFFLE #if PS_SHUFFLE
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME #if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C); uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u));

View File

@ -1674,7 +1674,8 @@ void GSState::FlushPrim()
Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM); Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM);
} }
#endif #endif
// Update scissor, it may have been modified by a previous draw
m_env.CTXT[PRIM->CTXT].UpdateScissor();
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM)); m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
// Texel coordinate rounding // Texel coordinate rounding

View File

@ -224,6 +224,8 @@ public:
bool m_texflush_flag = false; bool m_texflush_flag = false;
bool m_isPackedUV_HackFlag = false; bool m_isPackedUV_HackFlag = false;
bool m_channel_shuffle = false; bool m_channel_shuffle = false;
bool m_in_target_draw = false;
u32 m_target_offset = 0;
u8 m_scanmask_used = 0; u8 m_scanmask_used = 0;
u32 m_dirty_gs_regs = 0; u32 m_dirty_gs_regs = 0;
int m_backed_up_ctx = 0; int m_backed_up_ctx = 0;

View File

@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
tex_pos &= 0xFF; tex_pos &= 0xFF;
shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8;
const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8;
process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0;
process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0;
// "same group" means it can read blue and write alpha using C32 tricks // "same group" means it can read blue and write alpha using C32 tricks
@ -489,7 +489,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
// Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width // Dogs will reuse the Z in a different size format for a completely unrelated draw with an FBW of 2, then go back to using it in full width
const bool size_is_wrong = tex->m_target ? (static_cast<int>(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false; const bool size_is_wrong = tex->m_target ? (static_cast<int>(tex->m_from_target_TEX0.TBW * 64) < tex->m_from_target->m_valid.z / 2) : false;
const u32 draw_page_width = std::max(static_cast<int>(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1); const u32 draw_page_width = std::max(static_cast<int>(m_vt.m_max.p.x + (!(process_ba & SHUFFLE_WRITE) ? 8.9f : 0.9f)) / 64, 1);
const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z); const bool single_direction_doubled = (m_vt.m_max.p.y > rt->m_valid.w) != (m_vt.m_max.p.x > rt->m_valid.z) || (IsSinglePageDraw() && m_r.height() > 32);
if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled))) if (size_is_wrong || (rt && ((rt->m_TEX0.TBW % draw_page_width) == 0 || single_direction_doubled)))
{ {
@ -554,7 +554,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
} }
else else
{ {
if ((floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW))) if (((m_r.width() + 8) & ~(GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x - 1)) != GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs.x && (floor(m_vt.m_max.p.y) <= rt->m_valid.w) && ((floor(m_vt.m_max.p.x) > (m_cached_ctx.FRAME.FBW * 64)) || (rt->m_TEX0.TBW != m_cached_ctx.FRAME.FBW)))
{ {
half_bottom_vert = false; half_bottom_vert = false;
half_bottom_uv = false; half_bottom_uv = false;
@ -587,6 +587,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
else else
v[i + 1 - reversed_U].U += 128u; v[i + 1 - reversed_U].U += 128u;
} }
else
{
if (((pos + 8) >> 4) & 0x8)
{
v[i + reversed_pos].XYZ.X -= 128u;
v[i + 1 - reversed_pos].XYZ.X -= 128u;
}
}
if (half_bottom_vert) if (half_bottom_vert)
{ {
@ -704,6 +712,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
m_vt.m_max.t.x += 8.0f; m_vt.m_max.t.x += 8.0f;
} }
} }
else
{
if (fmod(std::floor(m_vt.m_min.p.x), 64.0f) == 8.0f)
{
m_vt.m_min.p.x -= 8.0f;
m_vt.m_max.p.x -= 8.0f;
}
}
if (half_right_vert) if (half_right_vert)
{ {
@ -897,7 +913,7 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex)
} }
// If it's a channel shuffle, it'll likely be just a single page, so assume full screen. // If it's a channel shuffle, it'll likely be just a single page, so assume full screen.
if (m_channel_shuffle) if (m_channel_shuffle || (tex && IsPageCopy()))
{ {
const int page_x = frame_psm.pgs.x - 1; const int page_x = frame_psm.pgs.x - 1;
const int page_y = frame_psm.pgs.y - 1; const int page_y = frame_psm.pgs.y - 1;
@ -1008,6 +1024,25 @@ bool GSRendererHW::IsPossibleChannelShuffle() const
return false; return false;
} }
bool GSRendererHW::IsPageCopy() const
{
if (!PRIM->TME)
return false;
const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx];
if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20))
return false;
if (next_ctx.FRAME.FBP != (m_cached_ctx.FRAME.FBP + 0x1))
return false;
if (!NextDrawMatchesShuffle())
return false;
return true;
}
bool GSRendererHW::NextDrawMatchesShuffle() const bool GSRendererHW::NextDrawMatchesShuffle() const
{ {
// Make sure nothing unexpected has changed. // Make sure nothing unexpected has changed.
@ -1165,6 +1200,16 @@ GSVector4i GSRendererHW::GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages)
return GSVector4i::loadh(size); return GSVector4i::loadh(size);
} }
bool GSRendererHW::IsSinglePageDraw() const
{
const GSVector2i& frame_pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs;
if (m_r.width() <= frame_pgs.x && m_r.height() <= frame_pgs.y)
return true;
return false;
}
bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw) bool GSRendererHW::TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw)
{ {
const u32 start_bp = FRAME.Block(); const u32 start_bp = FRAME.Block();
@ -1575,7 +1620,11 @@ void GSRendererHW::Move()
const int w = m_env.TRXREG.RRW; const int w = m_env.TRXREG.RRW;
const int h = m_env.TRXREG.RRH; const int h = m_env.TRXREG.RRH;
GL_CACHE("Starting Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d) draw %d",
m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM),
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM),
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
sx, sy, dx, dy, w, h, s_n);
if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy,
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h))
{ {
@ -2026,6 +2075,8 @@ void GSRendererHW::Draw()
if (num_skipped_channel_shuffle_draws > 0) if (num_skipped_channel_shuffle_draws > 0)
GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws);
num_skipped_channel_shuffle_draws = 0; num_skipped_channel_shuffle_draws = 0;
m_last_channel_shuffle_fbp = 0xffff;
m_last_channel_shuffle_end_block = 0xffff;
#else #else
if (m_channel_shuffle) if (m_channel_shuffle)
return; return;
@ -2527,7 +2578,7 @@ void GSRendererHW::Draw()
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false, GSTextureCache::Target* tgt = g_texture_cache->LookupTarget(FRAME_TEX0, GSVector2i(m_vt.m_max.p.x, m_vt.m_max.p.y), GetTextureScaleFactor(), GSTextureCache::RenderTarget, false,
fm); fm, false, false, false, false, GSVector4i::zero(), true);
if (tgt) if (tgt)
shuffle_target = tgt->m_32_bits_fmt; shuffle_target = tgt->m_32_bits_fmt;
@ -2613,14 +2664,11 @@ void GSRendererHW::Draw()
} }
// Estimate size based on the scissor rectangle and height cache. // Estimate size based on the scissor rectangle and height cache.
const GSVector2i t_size = GetTargetSize(src); GSVector2i t_size = GetTargetSize(src);
const GSVector4i t_size_rect = GSVector4i::loadh(t_size); const GSVector4i t_size_rect = GSVector4i::loadh(t_size);
// Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area.
const GSVector4i unclamped_draw_rect = m_r; const GSVector4i unclamped_draw_rect = m_r;
// Don't clamp on shuffle, the height cache may troll us with the REAL height.
if (!m_texture_shuffle && m_split_texture_shuffle_pages == 0)
m_r = m_r.rintersect(t_size_rect);
float target_scale = GetTextureScaleFactor(); float target_scale = GetTextureScaleFactor();
int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound);
@ -2677,81 +2725,10 @@ void GSRendererHW::Draw()
GSTextureCache::Target* rt = nullptr; GSTextureCache::Target* rt = nullptr;
GIFRegTEX0 FRAME_TEX0; GIFRegTEX0 FRAME_TEX0;
if (!no_rt) const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM];
{
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
// Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead m_in_target_draw = false;
// (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to m_target_offset = 0;
// create that target, because the clear isn't black, it'll hang around and never get invalidated.
const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover;
const bool is_clear = is_possible_mem_clear && is_square;
const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) &&
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle());
// Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size.
// Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed.
const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw));
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear);
// Draw skipped because it was a clear and there was no target.
if (!rt)
{
if (is_clear)
{
GL_INS("Clear draw with no target, skipping.");
const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color);
const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth);
const u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const u32 ds_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r);
TryGSMemClear(no_rt, preserve_rt_color, is_zero_color_clear, rt_end_bp,
no_ds, preserve_depth, is_zero_depth_clear, ds_end_bp);
CleanupDraw(true);
return;
}
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_color | possible_shuffle, m_r, src);
if (!rt) [[unlikely]]
{
GL_INS("ERROR: Failed to create FRAME target, skipping.");
CleanupDraw(true);
return;
}
}
if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt)
{
src->m_texture = rt->m_texture;
src->m_scale = rt->GetScale();
src->m_unscaled_size = rt->m_unscaled_size;
}
target_scale = rt->GetScale();
// The target might have previously been a C32 format with valid alpha. If we're switching to C24, we need to preserve it.
preserve_rt_alpha |= (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == 24 && rt->HasValidAlpha());
preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
if (m_channel_shuffle)
{
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
// If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following.
m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block);
}
}
GSTextureCache::Target* ds = nullptr; GSTextureCache::Target* ds = nullptr;
GIFRegTEX0 ZBUF_TEX0; GIFRegTEX0 ZBUF_TEX0;
@ -2759,23 +2736,25 @@ void GSRendererHW::Draw()
{ {
ZBUF_TEX0.U64 = 0; ZBUF_TEX0.U64 = 0;
ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block();
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW;
ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM;
ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil,
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false,
src, -1);
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
if (!ds) if (!ds)
{ {
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src);
true, 0, false, force_preload, preserve_depth, m_r, src); if (!ds) [[unlikely]]
if (!ds) [[unlikely]] {
{ GL_INS("ERROR: Failed to create ZBUF target, skipping.");
GL_INS("ERROR: Failed to create ZBUF target, skipping."); CleanupDraw(true);
CleanupDraw(true); return;
return; }
}
} }
else else
{ {
@ -2785,7 +2764,7 @@ void GSRendererHW::Draw()
if (ds->m_alpha_max != 0) if (ds->m_alpha_max != 0)
{ {
const u32 max_z = (static_cast<u64>(ds->m_alpha_max + 1) << 24) - 1; const u32 max_z = (static_cast<u64>(ds->m_alpha_max + 1) << 24) - 1;
switch (m_cached_ctx.TEST.ZTST) switch (m_cached_ctx.TEST.ZTST)
{ {
case ZTST_GEQUAL: case ZTST_GEQUAL:
@ -2820,6 +2799,138 @@ void GSRendererHW::Draw()
} }
} }
if (!no_rt)
{
const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) &&
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle());
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
// Don't clamp on shuffle, the height cache may troll us with the REAL height.
if (!possible_shuffle && m_split_texture_shuffle_pages == 0)
m_r = m_r.rintersect(t_size_rect);
// Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead
// (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to
// create that target, because the clear isn't black, it'll hang around and never get invalidated.
const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_primitive_covers_without_gaps == NoGapsType::FullCover;
const bool is_clear = is_possible_mem_clear && is_square;
// Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size.
// Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed.
const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw));
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(),
GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (ds->m_TEX0.TBP0 - m_cached_ctx.ZBUF.Block()));
// Draw skipped because it was a clear and there was no target.
if (!rt)
{
if (is_clear)
{
GL_INS("Clear draw with no target, skipping.");
const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color);
const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth);
const u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const u32 ds_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r);
TryGSMemClear(no_rt, preserve_rt_color, is_zero_color_clear, rt_end_bp,
no_ds, preserve_depth, is_zero_depth_clear, ds_end_bp);
CleanupDraw(true);
return;
}
else if (IsPageCopy() && src->m_from_target && m_cached_ctx.TEX0.TBP0 >= src->m_from_target->m_TEX0.TBP0)
{
FRAME_TEX0.TBW = src->m_from_target->m_TEX0.TBW;
}
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_color | possible_shuffle, m_r, src);
if (!rt) [[unlikely]]
{
GL_INS("ERROR: Failed to create FRAME target, skipping.");
CleanupDraw(true);
return;
}
}
else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt
{
GSVertex* v = &m_vertex.buff[0];
int vertical_offset = ((std::abs(static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it..
const int horizontal_offset = (std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x;
// Used to reduce the offset made later in channel shuffles
m_target_offset = std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5);
for (u32 i = 0; i < m_vertex.tail; i++)
{
v[i].XYZ.Y += vertical_offset << 4;
v[i].XYZ.X += horizontal_offset << 4;
}
m_context->scissor.in.x += horizontal_offset;
m_context->scissor.in.z += horizontal_offset;
m_context->scissor.in.y += vertical_offset;
m_context->scissor.in.w += vertical_offset;
m_r.y += vertical_offset;
m_r.w += vertical_offset;
m_r.x += horizontal_offset;
m_r.z += horizontal_offset;
m_in_target_draw = true;
m_vt.m_min.p.x += horizontal_offset;
m_vt.m_max.p.x += horizontal_offset;
m_vt.m_min.p.y += vertical_offset;
m_vt.m_max.p.y += vertical_offset;
t_size.x = rt->m_unscaled_size.x - horizontal_offset;
t_size.y = rt->m_unscaled_size.y - vertical_offset;
if (t_size.y <= 0)
{
u32 new_height = m_r.w;
if (possible_shuffle && std::abs(static_cast<s16>(GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16)
new_height /= 2;
//DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n);
rt->ResizeTexture(rt->m_unscaled_size.x, new_height);
rt->UpdateValidity(m_r, true);
rt->UpdateDrawn(m_r, true);
}
}
if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt)
{
src->m_texture = rt->m_texture;
src->m_scale = rt->GetScale();
src->m_unscaled_size = rt->m_unscaled_size;
}
target_scale = rt->GetScale();
// The target might have previously been a C32 format with valid alpha. If we're switching to C24, we need to preserve it.
preserve_rt_alpha |= (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == 24 && rt->HasValidAlpha());
preserve_rt_color = preserve_rt_rgb || preserve_rt_alpha;
if (m_channel_shuffle)
{
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
// If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following.
m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block);
}
else
m_last_channel_shuffle_end_block = 0xFFFF;
}
if (m_process_texture) if (m_process_texture)
{ {
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
@ -3044,7 +3155,7 @@ void GSRendererHW::Draw()
} }
} }
const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque(); const bool blending_cd = PRIM->ABE && !m_context->ALPHA.IsOpaque();
if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM)) if (rt && ((!is_possible_mem_clear || blending_cd) || rt->m_TEX0.PSM != FRAME_TEX0.PSM) && !m_in_target_draw)
{ {
if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000)) if (rt->m_TEX0.TBW != FRAME_TEX0.TBW && !m_cached_ctx.ZBUF.ZMSK && (m_cached_ctx.FRAME.FBMSK & 0xFF000000))
{ {
@ -3055,11 +3166,18 @@ void GSRendererHW::Draw()
if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) if (m_cached_ctx.FRAME.FBMSK & 0xF0000000)
rt->m_valid_alpha_high = false; rt->m_valid_alpha_high = false;
} }
rt->m_TEX0 = FRAME_TEX0; if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y))
{
rt->m_TEX0 = FRAME_TEX0;
}
} }
if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW))) if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw)
ds->m_TEX0 = ZBUF_TEX0; {
if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y))
ds->m_TEX0 = ZBUF_TEX0;
}
} }
else if (!m_texture_shuffle) else if (!m_texture_shuffle)
{ {
@ -3067,7 +3185,7 @@ void GSRendererHW::Draw()
// The FBW should also be okay, since it's coming from the source. // The FBW should also be okay, since it's coming from the source.
if (rt) if (rt)
{ {
const bool update_fbw = (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack());
rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW);
rt->m_TEX0.PSM = FRAME_TEX0.PSM; rt->m_TEX0.PSM = FRAME_TEX0.PSM;
} }
@ -3089,7 +3207,7 @@ void GSRendererHW::Draw()
GSTextureCache::Target* old_ds = nullptr; GSTextureCache::Target* old_ds = nullptr;
// If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks. // If the draw is dated, we're going to expand in to black, so it's just a pointless rescale which will mess up our valid rects and end blocks.
if(!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM)) if (!(m_cached_ctx.TEST.DATE && m_cached_ctx.TEST.DATM))
{ {
GSVector2i new_size = t_size; GSVector2i new_size = t_size;
@ -3137,7 +3255,7 @@ void GSRendererHW::Draw()
rt->ResizeDrawn(rt->GetUnscaledRect()); rt->ResizeDrawn(rt->GetUnscaledRect());
} }
const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(new_size)); const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h)));
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle));
@ -3429,6 +3547,8 @@ void GSRendererHW::Draw()
if (rt) if (rt)
rt->m_last_draw = s_n; rt->m_last_draw = s_n;
if (ds)
ds->m_last_draw = s_n;
#ifdef DISABLE_HW_TEXTURE_CACHE #ifdef DISABLE_HW_TEXTURE_CACHE
if (rt) if (rt)
g_texture_cache->Read(rt, real_rect); g_texture_cache->Read(rt, real_rect);
@ -3883,7 +4003,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
m_conf.ps.urban_chaos_hle = 1; m_conf.ps.urban_chaos_hle = 1;
} }
} }
else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3)
{ {
// Blood will tell. I think it is channel effect too but again // Blood will tell. I think it is channel effect too but again
// implemented in a different way. I don't want to add more CRC stuff. So // implemented in a different way. I don't want to add more CRC stuff. So
@ -3991,8 +4111,8 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
min_uv.x -= block_offset.x * t_psm.bs.x; min_uv.x -= block_offset.x * t_psm.bs.x;
min_uv.y -= block_offset.y * t_psm.bs.y; min_uv.y -= block_offset.y * t_psm.bs.y;
if (GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) && //if (/*GSLocalMemory::IsPageAligned(src->m_TEX0.PSM, m_r) &&*/
block_offset.eq(m_r_block_offset)) // block_offset.eq(m_r_block_offset))
{ {
if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0))) if (min_uv.eq(GSVector4i::cxpr(0, 0, 0, 0)))
channel = ChannelFetch_RED; channel = ChannelFetch_RED;
@ -4040,13 +4160,44 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
// Performance GPU note: it could be wise to reduce the size to // Performance GPU note: it could be wise to reduce the size to
// the rendered size of the framebuffer // the rendered size of the framebuffer
GSVertex* s = &m_vertex.buff[0]; if (!m_in_target_draw && (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || NextDrawMatchesShuffle()))
s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 0); {
s[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 16384); GSVertex* s = &m_vertex.buff[0];
s[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + 0); s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 0);
s[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + 16384); s[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 16384);
s[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + 0);
s[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + 16384);
m_r = GSVector4i(0, 0, 1024, 1024); s[0].U = 0;
s[1].U = 16384;
s[0].V = 0;
s[1].V = 16384;
m_r = GSVector4i(0, 0, 1024, 1024);
}
else
{
const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
const u32 frame_page_offset = std::max(static_cast<int>(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0);
m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1));
m_cached_ctx.FRAME.FBP += frame_page_offset;
m_in_target_draw |= frame_page_offset > 0;
GSVertex* s = &m_vertex.buff[0];
s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.x << 4));
s[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.z << 4));
s[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.y << 4));
s[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.w << 4));
const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y);
m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5;
s[0].U = m_r.x << 4;
s[1].U = m_r.z << 4;
s[0].V = m_r.y << 4;
s[1].V = m_r.w << 4;
m_last_channel_shuffle_fbmsk = 0xFFFFFFFF;
}
m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2; m_index.tail = 2;
@ -5194,9 +5345,12 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region,
bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy) bool& target_region, GSVector2i& unscaled_size, float& scale, GSDevice::RecycledTexture& src_copy)
{ {
const int tex_diff = tex->m_from_target ? static_cast<int>(m_cached_ctx.TEX0.TBP0 - tex->m_from_target->m_TEX0.TBP0) : 0;
const int frame_diff = rt ? static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0;
// Detect framebuffer read that will need special handling // Detect framebuffer read that will need special handling
const GSTextureCache::Target* src_target = nullptr; const GSTextureCache::Target* src_target = nullptr;
if (m_conf.tex == m_conf.rt) if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region)))
{ {
// Can we read the framebuffer directly? (i.e. sample location matches up). // Can we read the framebuffer directly? (i.e. sample location matches up).
if (CanUseTexIsFB(rt, tex, tmm)) if (CanUseTexIsFB(rt, tex, tmm))
@ -5236,6 +5390,10 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); GL_CACHE("Source is depth buffer, unsafe to read, taking copy.");
src_target = ds; src_target = ds;
} }
else if (m_channel_shuffle && tex->m_from_target && tex_diff != frame_diff)
{
src_target = tex->m_from_target;
}
else if (!m_downscale_source) else if (!m_downscale_source)
{ {
// No match. // No match.
@ -5250,7 +5408,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
GSVector4i copy_range; GSVector4i copy_range;
GSVector2i copy_size; GSVector2i copy_size;
GSVector2i copy_dst_offset; GSVector2i copy_dst_offset;
bool copied_rt = false;
// Shuffles take the whole target. This should've already been halved. // Shuffles take the whole target. This should've already been halved.
// We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above.
// Restricting it also breaks Tom and Jerry... // Restricting it also breaks Tom and Jerry...
@ -5258,7 +5416,37 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
{ {
copy_range = src_bounds; copy_range = src_bounds;
copy_size = src_unscaled_size; copy_size = src_unscaled_size;
GSVector4i::storel(&copy_dst_offset, copy_range); GSVector4i::storel(&copy_dst_offset, copy_range);
if (m_channel_shuffle && (tex_diff || frame_diff))
{
u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5;
u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y;
u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x;
copy_range.y += vertical_offset;
copy_range.x += horizontal_offset;
copy_size.y -= vertical_offset;
copy_size.x -= horizontal_offset;
target_region = false;
source_region.bits = 0;
//copied_rt = tex->m_from_target != nullptr;
if (m_in_target_draw)
{
copy_size.x = m_r.width();
copy_size.y = m_r.height();
copy_range.w = copy_range.y + copy_size.y;
copy_range.z = copy_range.x + copy_size.x;
if (tex_diff != frame_diff)
{
GSVector4i::storel(&copy_dst_offset, m_r);
copy_size.x += copy_dst_offset.x;
copy_size.y += copy_dst_offset.y;
}
}
}
} }
else else
{ {
@ -5268,7 +5456,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
copy_size.y = std::min(tex_size.y, src_unscaled_size.y); copy_size.y = std::min(tex_size.y, src_unscaled_size.y);
// Use the texture min/max to get the copy range if not reinterpreted. // Use the texture min/max to get the copy range if not reinterpreted.
if (m_texture_shuffle) if (m_texture_shuffle || m_channel_shuffle)
copy_range = GSVector4i::loadh(copy_size); copy_range = GSVector4i::loadh(copy_size);
else else
copy_range = tmm.coverage; copy_range = tmm.coverage;
@ -5339,12 +5527,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.y) * scale))); static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.y) * scale)));
src_copy.reset(src_target->m_texture->IsDepthStencil() ? src_copy.reset(src_target->m_texture->IsDepthStencil() ?
g_gs_device->CreateDepthStencil( g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) :
scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) :
(m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true));
true) :
g_gs_device->CreateTexture(
scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)));
if (!src_copy) [[unlikely]] if (!src_copy) [[unlikely]]
{ {
Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y);
@ -5352,6 +5537,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
m_conf.ps.tfx = 4; m_conf.ps.tfx = 4;
return; return;
} }
if (m_downscale_source) if (m_downscale_source)
{ {
g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_perfmon.Put(GSPerfMon::TextureCopies, 1);
@ -7190,7 +7376,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
{ {
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
{ {
GL_PUSH("OI_BlitFMV"); GL_PUSH("OI_BlitFMV");
@ -7244,7 +7430,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc
g_texture_cache->InvalidateVideoMemSubTarget(_rt); g_texture_cache->InvalidateVideoMemSubTarget(_rt);
return false; // skip current draw return false; // skip current draw
} }*/
// Nothing to see keep going // Nothing to see keep going
return true; return true;

View File

@ -113,12 +113,14 @@ private:
void SetTCOffset(); void SetTCOffset();
bool IsPossibleChannelShuffle() const; bool IsPossibleChannelShuffle() const;
bool IsPageCopy() const;
bool NextDrawMatchesShuffle() const; bool NextDrawMatchesShuffle() const;
bool IsSplitTextureShuffle(GSTextureCache::Target* rt); bool IsSplitTextureShuffle(GSTextureCache::Target* rt);
GSVector4i GetSplitTextureShuffleDrawRect() const; GSVector4i GetSplitTextureShuffleDrawRect() const;
u32 GetEffectiveTextureShuffleFbmsk() const; u32 GetEffectiveTextureShuffleFbmsk() const;
static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages); static GSVector4i GetDrawRectForPages(u32 bw, u32 psm, u32 num_pages);
bool IsSinglePageDraw() const;
bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw); bool TryToResolveSinglePageFramebuffer(GIFRegFRAME& FRAME, bool only_next_draw);
bool IsSplitClearActive() const; bool IsSplitClearActive() const;

View File

@ -18,6 +18,7 @@
#include "fmt/format.h" #include "fmt/format.h"
#include <cinttypes> #include <cinttypes>
#include <math.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <stdlib.h> #include <stdlib.h>
@ -945,7 +946,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c
t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y); t->ResizeTexture(t->m_unscaled_size.x, t->m_unscaled_size.y);
t->m_valid = dst->m_valid; t->m_valid = dst->m_valid;
} }
CopyRGBFromDepthToColor(t, dst); CopyRGBFromDepthToColor(t, dst);
} }
@ -1091,9 +1092,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
req_rect.y = region.HasY() ? region.GetMinY() : 0; req_rect.y = region.HasY() ? region.GetMinY() : 0;
GSVector4i block_boundary_rect = req_rect; GSVector4i block_boundary_rect = req_rect;
block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1);
block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1);
// Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects.
block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1));
block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1));
// Arc the Lad finds the wrong surface here when looking for a depth stencil. // Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
@ -1115,8 +1118,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1)))
continue; continue;
//const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect);
const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect);
// Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha.
// Also is we have already found a target which we had to offset in to by using a region or exact address, // Also is we have already found a target which we had to offset in to by using a region or exact address,
// it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind)
@ -1523,7 +1526,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
rect.y -= new_rect.y & ~(page_size.y - 1); rect.y -= new_rect.y & ~(page_size.y - 1);
} }
rect = rect.rintersect(t->m_valid); //rect = rect.rintersect(t->m_valid);
if (rect.rempty()) if (rect.rempty())
continue; continue;
@ -1644,12 +1647,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport)
{ {
GSVector4i new_rect = req_rect;
// Just in case the TextureMinMax trolls us as it does, when checking if inside the target.
new_rect.z -= 2;
new_rect.w -= 2;
// Let's try a trick to avoid to use wrongly a depth buffer // Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase // Unfortunately, I don't have any Arc the Lad testcase
// //
@ -1658,7 +1655,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
{ {
for (auto t : m_dst[DepthStencil]) for (auto t : m_dst[DepthStencil])
{ {
if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, new_rect)) if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && t->Inside(bp, bw, psm, block_boundary_rect))
{ {
GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled"); GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled");
// Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the // Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the
@ -1668,7 +1665,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
GIFRegTEX0 depth_TEX0; GIFRegTEX0 depth_TEX0;
depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
depth_TEX0.U32[1] = TEX0.U32[1]; depth_TEX0.U32[1] = TEX0.U32[1];
src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha);
if (src != nullptr) if (src != nullptr)
{ {
@ -1690,7 +1687,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
} }
else else
{ {
src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); src = LookupDepthSource(false, TEX0, TEXA, CLAMP, block_boundary_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true);
if (src != nullptr) if (src != nullptr)
{ {
@ -1803,7 +1800,8 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca
} }
GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, bool preserve_scale) bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect,
bool is_shuffle, bool possible_clear, bool preserve_scale, GSTextureCache::Source* src, int offset)
{ {
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const u32 bp = TEX0.TBP0; const u32 bp = TEX0.TBP0;
@ -1812,8 +1810,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
const GSVector4 sRect(0, 0, 1, 1); const GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect{}; GSVector4 dRect{};
bool clear = true; bool clear = true;
const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) const auto& calcRescale = [&size, &scale, &new_size, &new_scaled_size, &clear, &dRect](const Target* tgt) {
{
// TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one. // TODO Possible optimization: rescale only the validity rectangle of the old target texture into the new one.
clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y); clear = (size.x > tgt->m_unscaled_size.x || size.y > tgt->m_unscaled_size.y);
new_size = size.max(tgt->m_unscaled_size); new_size = size.max(tgt->m_unscaled_size);
@ -1826,7 +1823,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
Target* dst = nullptr; Target* dst = nullptr;
auto& list = m_dst[type]; auto& list = m_dst[type];
const GSVector4i min_rect = draw_rect.max_u32(GSVector4i(0, 0, draw_rect.x, draw_rect.y));
// TODO: Move all frame stuff to its own routine too. // TODO: Move all frame stuff to its own routine too.
if (!is_frame) if (!is_frame)
{ {
@ -1837,6 +1834,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
if (bp == t->m_TEX0.TBP0) if (bp == t->m_TEX0.TBP0)
{ {
bool can_use = true; bool can_use = true;
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
{
DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0);
continue;
}
// if It's an old target and it's being completely overwritten, kill it. // if It's an old target and it's being completely overwritten, kill it.
// Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But,
// it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing
@ -1880,9 +1884,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst = t; dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16); dst->m_32_bits_fmt |= (psm_s.bpp != 16);
break;
/*if (FindOverlappingTarget(dst))
continue;
else*/
break;
} }
else else if(!(src && src->m_from_target == t))
{ {
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t); InvalidateSourcesFromTarget(t);
@ -1890,6 +1898,39 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
delete t; delete t;
} }
} }
// Probably pointing to half way through the target
else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets)
{
if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset)
continue;
const u32 widthpage_offset = (std::abs(static_cast<int>(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U);
const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast<u32>(min_rect.width()) <= (widthpage_offset * 64)));
if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect))
{
// If it's too old, it's probably not a real target to jump in to anymore.
if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle &&
!(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 ||
(widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64)))))
{
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t);
i = list.erase(i);
delete t;
}
else
{
//DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width());
dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16);
//Continue just in case there's a newer target
if (used)
list.MoveFront(i.Index());
break;
}
}
}
} }
} }
else else
@ -2034,6 +2075,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{ {
calcRescale(dst); calcRescale(dst);
GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false);
if (!tex)
return nullptr;
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(dst->m_texture); g_gs_device->Recycle(dst->m_texture);
@ -2042,6 +2085,67 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_alpha_min = 0; dst->m_alpha_min = 0;
dst->m_alpha_max = 0; dst->m_alpha_max = 0;
} }
else if (!is_shuffle && std::abs(static_cast<s16>(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16)
{
dst->Update(false);
const bool scale_down = GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp > GSLocalMemory::m_psm[TEX0.PSM].bpp;
new_size = dst->m_unscaled_size;
new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale);
dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil();
if (scale_down)
{
if ((new_size.y * 2) < 1024)
{
new_scaled_size.y *= 2;
new_size.y *= 2;
dst->m_valid.y *= 2;
dst->m_valid.w *= 2;
}
dRect.y *= 2;
dRect.w *= 2;
}
else
{
new_scaled_size.y /= 2;
new_size.y /= 2;
dRect.y /= 2;
dRect.w /= 2;
dst->m_valid.y /= 2;
dst->m_valid.w /= 2;
}
GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y,
dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y,
scale);
//DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) :
g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true);
if (!tex)
return nullptr;
m_target_memory_usage += tex->GetMemUsage();
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
if (src && src->m_from_target && src->m_from_target == dst)
{
src->m_texture = dst->m_texture;
src->m_target_direct = false;
src->m_shared_texture = false;
}
else
{
m_target_memory_usage -= dst->m_texture->GetMemUsage();
g_gs_device->Recycle(dst->m_texture);
}
dst->m_TEX0.PSM = TEX0.PSM;
dst->m_texture = tex;
dst->m_unscaled_size = new_size;
}
// If our RGB was invalidated, we need to pull it from depth. // If our RGB was invalidated, we need to pull it from depth.
// Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth. // Terminator 3 will reuse our dst_matched target with the RGB masked, then later use the full ARGB area, so we need to update the depth.
@ -2242,6 +2346,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24; dst->m_valid_alpha_high = dst_match->m_valid_alpha_high; //&& psm_s.trbpp != 24;
dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_valid_rgb = dst_match->m_valid_rgb;
dst->m_was_dst_matched = true; dst->m_was_dst_matched = true;
dst_match->m_was_dst_matched = true;
dst_match->m_valid_rgb = false;
if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16)
dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries).
@ -2569,7 +2675,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
{ {
const GSVector4i save_rect = preserve_target ? newrect : eerect; const GSVector4i save_rect = preserve_target ? newrect : eerect;
if(!hw_clear) if (!hw_clear)
dst->UpdateValidity(save_rect); dst->UpdateValidity(save_rect);
GL_INS("Preloading the RT DATA from updated GS Memory"); GL_INS("Preloading the RT DATA from updated GS Memory");
AddDirtyRectTarget(dst, save_rect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16); AddDirtyRectTarget(dst, save_rect, TEX0.PSM, TEX0.TBW, rgba, GSLocalMemory::m_psm[TEX0.PSM].trbpp >= 16);
@ -2604,8 +2710,8 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
{ {
auto j = i; auto j = i;
Target* t = *j; Target* t = *j;
if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->m_TEX0.TBW > 4) if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM /*&& t->m_TEX0.TBW >= 4*/)
if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid))
{ {
// If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target.
@ -2642,6 +2748,15 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
continue; continue;
} }
const int dst_offset = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y);
if ((dst_offset + t->m_valid.w) > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, (dst_offset + t->m_valid.w), true))
{
// Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU.
// We injected the new height into the cache, so hopefully won't happen again.
continue;
}
const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x;
const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y);
const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; const int dst_offset_scaled_width = dst_offset_width * dst->m_scale;
@ -2657,6 +2772,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
// Clear the dirty first // Clear the dirty first
t->Update(); t->Update();
dst->Update(); dst->Update();
if ((dst_offset + copy_height) > dst->m_unscaled_size.y)
DevCon.Warning("Way too tall draw %d", GSState::s_n);
if ((copy_height) > t->m_unscaled_size.y)
DevCon.Warning("Way too tall for src draw %d", GSState::s_n);
// Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing.
if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale)
{ {
@ -2671,22 +2792,22 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
} }
} }
if ((overlapping_pages < rt_pages) || (src && src->m_target && src->m_from_target == t)) // Probably a shuffle and it shouldn't be creating a new offset target, so let's not destroy the original.
if (src && src->m_target && src->m_from_target == t)
{
i++;
continue;
}
if ((overlapping_pages < rt_pages))
{ {
// This should never happen as we're making a new target so the src should never be something it overlaps, but just incase.. // This should never happen as we're making a new target so the src should never be something it overlaps, but just incase..
GSVector4i new_valid = t->m_valid; GSVector4i new_valid = dst->m_valid.runion(GSVector4i(t->m_valid.x, t->m_valid.y + dst_offset, t->m_valid.z, t->m_valid.w + dst_offset));
new_valid.y = std::max(new_valid.y - overlapping_pages_height, 0); dst->UpdateValidity(new_valid);
new_valid.w = std::max(new_valid.w - overlapping_pages_height, 0);
t->m_TEX0.TBP0 += (overlapping_pages_height / GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) << 5;
t->ResizeValidity(new_valid);
} }
else
{ InvalidateSourcesFromTarget(t);
InvalidateSourcesFromTarget(t); i = list.erase(j);
i = list.erase(j); delete t;
delete t;
}
return hw_clear.value_or(false);
} }
} }
i++; i++;
@ -2888,7 +3009,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
} }
// Inject the new size back into the cache. // Inject the new size back into the cache.
GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast<u32>(needed_height)); GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast<u32>(needed_height));
} }
float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert)
@ -3054,6 +3175,17 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
continue; continue;
} }
// Not covering the whole target, and a different format, so just dirty it.
if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM)
{
const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm];
u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5;
GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y);
InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true);
++i;
continue;
}
InvalidateSourcesFromTarget(t); InvalidateSourcesFromTarget(t);
t->m_valid_alpha_low &= preserve_alpha; t->m_valid_alpha_low &= preserve_alpha;
@ -3198,8 +3330,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
// But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here. // But this causes rects to be too big, especially in WRC games, I don't think there's any need to align them here.
GSVector4i r = rect; GSVector4i r = rect;
off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) off.loopPages(rect, [this, &rect, bp, bw, psm, &found](u32 page) {
{
auto& list = m_src.m_map[page]; auto& list = m_src.m_map[page];
for (auto i = list.begin(); i != list.end();) for (auto i = list.begin(); i != list.end();)
{ {
@ -3809,7 +3940,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// Make sure the copy doesn't go out of bounds (it shouldn't). // Make sure the copy doesn't go out of bounds (it shouldn't).
if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight())
return false; return false;
GL_CACHE("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW,
psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h);
const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid);
@ -4342,7 +4473,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo
if (s->m_from_hash_cache) if (s->m_from_hash_cache)
s->m_from_hash_cache->refcount++; s->m_from_hash_cache->refcount++;
else if (!s->m_shared_texture) else if (!s->m_shared_texture)
{
DevCon.Warning("replace %d", m_source_memory_usage);
m_source_memory_usage += s->m_texture->GetMemUsage(); m_source_memory_usage += s->m_texture->GetMemUsage();
}
} }
void GSTextureCache::IncAge() void GSTextureCache::IncAge()
@ -4363,7 +4497,7 @@ void GSTextureCache::IncAge()
AgeHashCache(); AgeHashCache();
// As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered. // As of 04/15/2024 this is s et to 60 (just 1 second of targets), which should be fine now as it doesn't destroy targets which haven't been covered.
// //
// For reference, here are some games sensitive to killing old targets: // For reference, here are some games sensitive to killing old targets:
// Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions. // Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions.
// ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it
@ -4478,7 +4612,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
return nullptr; return nullptr;
} }
m_source_memory_usage += dTex->GetMemUsage(); m_target_memory_usage += dTex->GetMemUsage();
// copy the rt in // copy the rt in
const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy()));
@ -4795,7 +4929,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
return nullptr; return nullptr;
} }
m_source_memory_usage += dTex->GetMemUsage(); src->m_shared_texture = false;
src->m_target_direct = false;
m_target_memory_usage += dTex->GetMemUsage();
src->m_texture = dTex; src->m_texture = dTex;
if (use_texture) if (use_texture)
@ -5250,7 +5386,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height);
return nullptr; return nullptr;
} }
DevCon.Warning("Merged %d", m_source_memory_usage);
m_source_memory_usage += dtex->GetMemUsage(); m_source_memory_usage += dtex->GetMemUsage();
// Sort rect list by the texture, we want to batch as many as possible together. // Sort rect list by the texture, we want to batch as many as possible together.
@ -5596,8 +5732,7 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::LookupPaletteObject(con
void GSTextureCache::Read(Target* t, const GSVector4i& r) void GSTextureCache::Read(Target* t, const GSVector4i& r)
{ {
if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) if ((!t->m_dirty.empty() && !t->m_dirty.GetTotalRect(t->m_TEX0, t->m_unscaled_size).rintersect(r).rempty()) || r.width() == 0 || r.height() == 0)
|| r.width() == 0 || r.height() == 0)
return; return;
const GIFRegTEX0& TEX0 = t->m_TEX0; const GIFRegTEX0& TEX0 = t->m_TEX0;
@ -5818,7 +5953,10 @@ GSTextureCache::Source::~Source()
// to recycle. // to recycle.
if (!m_shared_texture && !m_from_hash_cache && m_texture) if (!m_shared_texture && !m_from_hash_cache && m_texture)
{ {
g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); if(m_from_target)
g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage();
else
g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage();
g_gs_device->Recycle(m_texture); g_gs_device->Recycle(m_texture);
} }
} }
@ -6139,6 +6277,7 @@ GSTextureCache::Target::~Target()
{ {
// Targets should never be shared. // Targets should never be shared.
pxAssert(!m_shared_texture); pxAssert(!m_shared_texture);
if (m_texture) if (m_texture)
{ {
g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage();
@ -6440,7 +6579,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
m_valid = m_valid.rintersect(rect); m_valid = m_valid.rintersect(rect);
m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
// Else No valid size, so need to resize down. // Else No valid size, so need to resize down.
// GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
@ -6453,12 +6596,16 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res
m_valid = rect; m_valid = rect;
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
else if (can_resize) else if (can_resize)
{ {
m_valid = m_valid.runion(rect); m_valid = m_valid.runion(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
} }
@ -6549,8 +6696,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0)
m_surfaces.insert(s); m_surfaces.insert(s);
// The source pointer will be stored/duplicated in all m_map[array of pages] // The source pointer will be stored/duplicated in all m_map[array of pages]
s->m_pages.loopPages([this, s](u32 page) s->m_pages.loopPages([this, s](u32 page) {
{
s->m_erase_it[page] = m_map[page].InsertFront(s); s->m_erase_it[page] = m_map[page].InsertFront(s);
}); });
} }
@ -6593,8 +6739,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s",
s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM));
s->m_pages.loopPages([this, s](u32 page) s->m_pages.loopPages([this, s](u32 page) {
{
m_map[page].EraseIndex(s->m_erase_it[page]); m_map[page].EraseIndex(s->m_erase_it[page]);
}); });
@ -6924,6 +7069,7 @@ void GSTextureCache::Palette::InitializeTexture()
} }
m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0]));
g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage();
} }
} }
@ -7007,7 +7153,7 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
{ {
// Palette is unused // Palette is unused
it = map.erase(it); // Erase element from map it = map.erase(it); // Erase element from map
// The palette object should now be gone as the shared pointer to the object in the map is deleted // The palette object should now be gone as the shared pointer to the object in the map is deleted
} }
else else
{ {
@ -7071,10 +7217,7 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur
{ {
const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i]; const SurfaceOffsetKeyElem& lhs_elem = lhs.elems[i];
const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i]; const SurfaceOffsetKeyElem& rhs_elem = rhs.elems[i];
if (lhs_elem.bp != rhs_elem.bp if (lhs_elem.bp != rhs_elem.bp || lhs_elem.bw != rhs_elem.bw || lhs_elem.psm != rhs_elem.psm || !lhs_elem.rect.eq(rhs_elem.rect))
|| lhs_elem.bw != rhs_elem.bw
|| lhs_elem.psm != rhs_elem.psm
|| !lhs_elem.rect.eq(rhs_elem.rect))
return false; return false;
} }
return true; return true;

View File

@ -491,7 +491,7 @@ public:
Target* FindTargetOverlap(Target* target, int type, int psm); Target* FindTargetOverlap(Target* target, int type, int psm);
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0, Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true,
const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false); const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, bool preserve_scale = false, GSTextureCache::Source* src = nullptr, int offset = -1);
Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0, Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true, bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr); const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr);