GS/HW: Further fixes to RT in RT - Still a ways to go...

This commit is contained in:
refractionpcsx2 2024-06-26 12:23:35 +01:00
parent 9d6e500035
commit 8865ad90ff
3 changed files with 124 additions and 52 deletions

View File

@ -225,6 +225,7 @@ public:
bool m_isPackedUV_HackFlag = false; bool m_isPackedUV_HackFlag = false;
bool m_channel_shuffle = false; bool m_channel_shuffle = false;
bool m_in_target_draw = false; bool m_in_target_draw = false;
u32 m_target_offset = 0;
u8 m_scanmask_used = 0; u8 m_scanmask_used = 0;
u32 m_dirty_gs_regs = 0; u32 m_dirty_gs_regs = 0;
int m_backed_up_ctx = 0; int m_backed_up_ctx = 0;

View File

@ -2075,6 +2075,8 @@ void GSRendererHW::Draw()
if (num_skipped_channel_shuffle_draws > 0) if (num_skipped_channel_shuffle_draws > 0)
GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws);
num_skipped_channel_shuffle_draws = 0; num_skipped_channel_shuffle_draws = 0;
m_last_channel_shuffle_fbp = 0xffff;
m_last_channel_shuffle_end_block = 0xffff;
#else #else
if (m_channel_shuffle) if (m_channel_shuffle)
return; return;
@ -2732,18 +2734,21 @@ void GSRendererHW::Draw()
const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM];
m_in_target_draw = false; m_in_target_draw = false;
m_target_offset = 0;
if (!no_rt) if (!no_rt)
{ {
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) &&
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle()); IsPossibleChannelShuffle());
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
// Don't clamp on shuffle, the height cache may troll us with the REAL height. // Don't clamp on shuffle, the height cache may troll us with the REAL height.
if (!possible_shuffle && m_split_texture_shuffle_pages == 0) if (!possible_shuffle && m_split_texture_shuffle_pages == 0)
m_r = m_r.rintersect(t_size_rect); m_r = m_r.rintersect(t_size_rect);
@ -2758,8 +2763,6 @@ void GSRendererHW::Draw()
// Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed.
const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw));
m_in_target_draw = false;
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(),
GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src);
@ -2797,12 +2800,14 @@ void GSRendererHW::Draw()
return; return;
} }
} }
else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt
{ {
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. int vertical_offset = ((std::abs(static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it..
const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; const int horizontal_offset = (std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x;
// Used to reduce the offset made later in channel shuffles
m_target_offset = std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5);
for (u32 i = 0; i < m_vertex.tail; i++) for (u32 i = 0; i < m_vertex.tail; i++)
{ {
@ -2825,6 +2830,16 @@ void GSRendererHW::Draw()
m_vt.m_max.p.y += vertical_offset; m_vt.m_max.p.y += vertical_offset;
t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.x = rt->m_unscaled_size.x - horizontal_offset;
t_size.y = rt->m_unscaled_size.y - vertical_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset;
if (t_size.y <= 0)
{
u32 new_height = m_r.w;
//DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n);
rt->ResizeTexture(rt->m_unscaled_size.x, new_height);
rt->UpdateValidity(m_r, true);
rt->UpdateDrawn(m_r, true);
}
} }
if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt)
@ -2847,6 +2862,8 @@ void GSRendererHW::Draw()
// If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following.
m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block);
} }
else
m_last_channel_shuffle_end_block = 0xFFFF;
} }
GSTextureCache::Target* ds = nullptr; GSTextureCache::Target* ds = nullptr;
@ -2855,12 +2872,14 @@ void GSRendererHW::Draw()
{ {
ZBUF_TEX0.U64 = 0; ZBUF_TEX0.U64 = 0;
ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block();
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW;
ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM;
ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil,
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block());
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
if (!ds) if (!ds)
{ {
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
@ -3151,7 +3170,10 @@ void GSRendererHW::Draw()
rt->m_valid_alpha_high = false; rt->m_valid_alpha_high = false;
} }
if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y))
{
rt->m_TEX0 = FRAME_TEX0; rt->m_TEX0 = FRAME_TEX0;
}
} }
if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw)
@ -3305,7 +3327,7 @@ void GSRendererHW::Draw()
} }
} }
} }
else if (!m_in_target_draw) else
{ {
// RT and DS sizes need to match, even if we're not doing any resizing. // RT and DS sizes need to match, even if we're not doing any resizing.
const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0);
@ -3982,7 +4004,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
m_conf.ps.urban_chaos_hle = 1; m_conf.ps.urban_chaos_hle = 1;
} }
} }
else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3)
{ {
// Blood will tell. I think it is channel effect too but again // Blood will tell. I think it is channel effect too but again
// implemented in a different way. I don't want to add more CRC stuff. So // implemented in a different way. I don't want to add more CRC stuff. So
@ -4156,16 +4178,24 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
} }
else else
{ {
const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
const u32 frame_page_offset = std::max(static_cast<int>(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0);
m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1));
m_cached_ctx.FRAME.FBP += frame_page_offset;
m_in_target_draw |= frame_page_offset > 0;
GSVertex* s = &m_vertex.buff[0]; GSVertex* s = &m_vertex.buff[0];
s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.x << 4)); s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.x << 4));
s[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.z << 4)); s[1].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + (m_r.z << 4));
s[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.y << 4)); s[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.y << 4));
s[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.w << 4)); s[1].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + (m_r.w << 4));
s[0].U = (m_r.x << 4); const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
s[1].U = (m_r.z << 4); const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y);
s[0].V = (m_r.y << 4); m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5;
s[1].V = (m_r.w << 4); s[0].U = m_r.x << 4;
s[1].U = m_r.z << 4;
s[0].V = m_r.y << 4;
s[1].V = m_r.w << 4;
m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; m_last_channel_shuffle_fbmsk = 0xFFFFFFFF;
} }
@ -5321,7 +5351,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
const int frame_diff = rt ? static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; const int frame_diff = rt ? static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0;
// Detect framebuffer read that will need special handling // Detect framebuffer read that will need special handling
const GSTextureCache::Target* src_target = nullptr; const GSTextureCache::Target* src_target = nullptr;
if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region)))
{ {
// Can we read the framebuffer directly? (i.e. sample location matches up). // Can we read the framebuffer directly? (i.e. sample location matches up).
if (CanUseTexIsFB(rt, tex, tmm)) if (CanUseTexIsFB(rt, tex, tmm))
@ -5379,7 +5409,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
GSVector4i copy_range; GSVector4i copy_range;
GSVector2i copy_size; GSVector2i copy_size;
GSVector2i copy_dst_offset; GSVector2i copy_dst_offset;
bool copied_rt = false;
// Shuffles take the whole target. This should've already been halved. // Shuffles take the whole target. This should've already been halved.
// We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above.
// Restricting it also breaks Tom and Jerry... // Restricting it also breaks Tom and Jerry...
@ -5395,11 +5425,14 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5;
u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y;
u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x;
copy_range.y += vertical_offset; copy_range.y += vertical_offset;
copy_range.x += horizontal_offset; copy_range.x += horizontal_offset;
copy_size.y -= vertical_offset; copy_size.y -= vertical_offset;
copy_size.x -= horizontal_offset; copy_size.x -= horizontal_offset;
target_region = false;
source_region.bits = 0;
//copied_rt = tex->m_from_target != nullptr;
if (m_in_target_draw) if (m_in_target_draw)
{ {
copy_size.x = m_r.width(); copy_size.x = m_r.width();
@ -5495,12 +5528,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.y) * scale))); static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.y) * scale)));
src_copy.reset(src_target->m_texture->IsDepthStencil() ? src_copy.reset(src_target->m_texture->IsDepthStencil() ?
g_gs_device->CreateDepthStencil( g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) :
scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) :
(m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true));
true) :
g_gs_device->CreateTexture(
scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)));
if (!src_copy) [[unlikely]] if (!src_copy) [[unlikely]]
{ {
Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y);
@ -5508,6 +5538,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
m_conf.ps.tfx = 4; m_conf.ps.tfx = 4;
return; return;
} }
if (m_downscale_source) if (m_downscale_source)
{ {
g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_perfmon.Put(GSPerfMon::TextureCopies, 1);
@ -7346,7 +7377,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r,
bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw)
{ {
if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0)
{ {
GL_PUSH("OI_BlitFMV"); GL_PUSH("OI_BlitFMV");
@ -7400,7 +7431,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc
g_texture_cache->InvalidateVideoMemSubTarget(_rt); g_texture_cache->InvalidateVideoMemSubTarget(_rt);
return false; // skip current draw return false; // skip current draw
} }*/
// Nothing to see keep going // Nothing to see keep going
return true; return true;

View File

@ -18,6 +18,7 @@
#include "fmt/format.h" #include "fmt/format.h"
#include <cinttypes> #include <cinttypes>
#include <math.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <stdlib.h> #include <stdlib.h>
@ -1094,8 +1095,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1);
block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1);
// Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects.
block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1));
block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1));
// Arc the Lad finds the wrong surface here when looking for a depth stencil. // Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
@ -1117,8 +1118,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1)))
continue; continue;
//const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect);
const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect);
// Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha.
// Also is we have already found a target which we had to offset in to by using a region or exact address, // Also is we have already found a target which we had to offset in to by using a region or exact address,
// it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind) // it's probable that's more correct than being inside (Tomb Raider Legends + Project Snowblind)
@ -1525,7 +1526,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
rect.y -= new_rect.y & ~(page_size.y - 1); rect.y -= new_rect.y & ~(page_size.y - 1);
} }
rect = rect.rintersect(t->m_valid); //rect = rect.rintersect(t->m_valid);
if (rect.rempty()) if (rect.rempty())
continue; continue;
@ -1646,8 +1647,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport)
{ {
GSVector4i new_rect = req_rect;
// Let's try a trick to avoid to use wrongly a depth buffer // Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase // Unfortunately, I don't have any Arc the Lad testcase
// //
@ -1836,7 +1835,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
bool can_use = true; bool can_use = true;
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
{
DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0);
continue; continue;
}
// if It's an old target and it's being completely overwritten, kill it. // if It's an old target and it's being completely overwritten, kill it.
// Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But,
@ -1882,12 +1884,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_32_bits_fmt |= (psm_s.bpp != 16); dst->m_32_bits_fmt |= (psm_s.bpp != 16);
if (FindOverlappingTarget(dst)) /*if (FindOverlappingTarget(dst))
continue; continue;
else else*/
break; break;
} }
else else if(!(src && src->m_from_target == t))
{ {
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t); InvalidateSourcesFromTarget(t);
@ -1896,14 +1898,34 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
} }
} }
// Probably pointing to half way through the target // Probably pointing to half way through the target
else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets)
{ {
//DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); const u32 widthpage_offset = (std::abs(static_cast<int>(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U);
dst = t; const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast<u32>(min_rect.width()) <= (widthpage_offset * 64)));
if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect))
{
// If it's too old, it's probably not a real target to jump in to anymore.
if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle &&
!(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 ||
(widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64)))))
{
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t);
i = list.erase(i);
delete t;
}
else
{
//DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width());
dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16); dst->m_32_bits_fmt |= (psm_s.bpp != 16);
//Continue just in case there's a newer target //Continue just in case there's a newer target
continue; if (used)
list.MoveFront(i.Index());
break;
}
}
} }
} }
} }
@ -2049,6 +2071,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{ {
calcRescale(dst); calcRescale(dst);
GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false);
if (!tex)
return nullptr;
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(dst->m_texture); g_gs_device->Recycle(dst->m_texture);
@ -2089,10 +2113,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y,
dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y,
scale); scale);
DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) :
g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true);
if (!tex)
return nullptr;
m_target_memory_usage += tex->GetMemUsage(); m_target_memory_usage += tex->GetMemUsage();
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
@ -2983,7 +3008,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
} }
// Inject the new size back into the cache. // Inject the new size back into the cache.
GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast<u32>(needed_height)); GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast<u32>(needed_height));
} }
float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert)
@ -4452,7 +4477,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo
if (s->m_from_hash_cache) if (s->m_from_hash_cache)
s->m_from_hash_cache->refcount++; s->m_from_hash_cache->refcount++;
else if (!s->m_shared_texture) else if (!s->m_shared_texture)
{
DevCon.Warning("replace %d", m_source_memory_usage);
m_source_memory_usage += s->m_texture->GetMemUsage(); m_source_memory_usage += s->m_texture->GetMemUsage();
}
} }
void GSTextureCache::IncAge() void GSTextureCache::IncAge()
@ -4588,7 +4616,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
return nullptr; return nullptr;
} }
m_source_memory_usage += dTex->GetMemUsage(); m_target_memory_usage += dTex->GetMemUsage();
// copy the rt in // copy the rt in
const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy()));
@ -4905,7 +4933,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
return nullptr; return nullptr;
} }
m_source_memory_usage += dTex->GetMemUsage(); src->m_shared_texture = false;
src->m_target_direct = false;
m_target_memory_usage += dTex->GetMemUsage();
src->m_texture = dTex; src->m_texture = dTex;
if (use_texture) if (use_texture)
@ -5360,7 +5390,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height);
return nullptr; return nullptr;
} }
DevCon.Warning("Merged %d", m_source_memory_usage);
m_source_memory_usage += dtex->GetMemUsage(); m_source_memory_usage += dtex->GetMemUsage();
// Sort rect list by the texture, we want to batch as many as possible together. // Sort rect list by the texture, we want to batch as many as possible together.
@ -6251,6 +6281,7 @@ GSTextureCache::Target::~Target()
{ {
// Targets should never be shared. // Targets should never be shared.
pxAssert(!m_shared_texture); pxAssert(!m_shared_texture);
if (m_texture) if (m_texture)
{ {
g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage();
@ -6552,7 +6583,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
m_valid = m_valid.rintersect(rect); m_valid = m_valid.rintersect(rect);
m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
// Else No valid size, so need to resize down. // Else No valid size, so need to resize down.
// GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
@ -6565,12 +6600,16 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res
m_valid = rect; m_valid = rect;
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
else if (can_resize) else if (can_resize)
{ {
m_valid = m_valid.runion(rect); m_valid = m_valid.runion(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
} }
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
} }
@ -7034,6 +7073,7 @@ void GSTextureCache::Palette::InitializeTexture()
} }
m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0]));
g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage();
} }
} }