GS/HW: Further fixes for RT in RT changes in behaviour

This commit is contained in:
refractionpcsx2 2025-01-01 01:01:47 +00:00
parent c2256e77a7
commit f1f11f8285
8 changed files with 430 additions and 123 deletions

View File

@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
{ {
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{ {
C.rb = C.br; C.b = C.r;
float g_temp = C.g; C.a = C.g;
C.g = C.a;
C.a = g_temp;
} }
else if(PS_PROCESS_BA & SHUFFLE_READ) else if(PS_PROCESS_BA & SHUFFLE_READ)
{ {

View File

@ -1086,11 +1086,8 @@ void ps_main()
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#elif PS_SHUFFLE_ACROSS #elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br; C.b = C.r;
float g_temp = C.g; C.a = C.g;
C.g = C.a;
C.a = g_temp;
#elif(PS_PROCESS_BA & SHUFFLE_READ) #elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb; C.rb = C.bb;
C.ga = C.aa; C.ga = C.aa;

View File

@ -1350,11 +1350,8 @@ void main()
// Write RB part. Mask will take care of the correct destination // Write RB part. Mask will take care of the correct destination
#elif PS_SHUFFLE_ACROSS #elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br; C.b = C.r;
float g_temp = C.g; C.a = C.g;
C.g = C.a;
C.a = g_temp;
#elif(PS_PROCESS_BA & SHUFFLE_READ) #elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb; C.rb = C.bb;
C.ga = C.aa; C.ga = C.aa;

View File

@ -3095,6 +3095,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim)
if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk))))
return false; return false;
// Try to detect shuffles, because these will not autoflush, they by design clash.
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16)
{
// Pretty confident here...
GSVertex* buffer = &m_vertex.buff[0];
const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X);
if (const_spacing)
return false;
}
const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask);
// There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd.

View File

@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
tex_pos &= 0xFF; tex_pos &= 0xFF;
shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8;
const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8;
process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0;
process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0;
// "same group" means it can read blue and write alpha using C32 tricks // "same group" means it can read blue and write alpha using C32 tricks
@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
GSVector4::storeh(&v[1].ST.S, st); GSVector4::storeh(&v[1].ST.S, st);
} }
} }
m_r = fpr; m_r = r;
m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2; m_index.tail = 2;
return; return;
@ -1040,7 +1040,8 @@ bool GSRendererHW::IsPageCopy() const
if (!PRIM->TME) if (!PRIM->TME)
return false; return false;
const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx;
const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx];
if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20))
return false; return false;
@ -2461,7 +2462,7 @@ void GSRendererHW::Draw()
GIFRegTEX0 TEX0 = {}; GIFRegTEX0 TEX0 = {};
GSTextureCache::Source* src = nullptr; GSTextureCache::Source* src = nullptr;
TextureMinMaxResult tmm; TextureMinMaxResult tmm;
bool possible_shuffle = false;
// Disable texture mapping if the blend is black and using alpha from vertex. // Disable texture mapping if the blend is black and using alpha from vertex.
if (m_process_texture) if (m_process_texture)
{ {
@ -2578,7 +2579,7 @@ void GSRendererHW::Draw()
GIFRegTEX0 FRAME_TEX0; GIFRegTEX0 FRAME_TEX0;
bool shuffle_target = false; bool shuffle_target = false;
if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16)
{ {
if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0)
{ {
@ -2601,16 +2602,22 @@ void GSRendererHW::Draw()
const GSVertex* v = &m_vertex.buff[0]; const GSVertex* v = &m_vertex.buff[0];
const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4;
const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)));
const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f);
const bool shuffle_coords = (first_x ^ first_u) & 8; // offset coordinates swap around RG/BA. (Ace Combat)
const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; const u32 minv = m_cached_ctx.CLAMP.MINV;
const u32 minu = m_cached_ctx.CLAMP.MINU;
const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv));
const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle;
// Round up half of second coord, it can sometimes be slightly under.
const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4;
const int read_width = std::abs(second_u - first_u); const int read_width = std::abs(second_u - first_u);
shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1;
} }
} }
const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle());
possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle());
const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM;
const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask();
const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000));
@ -2636,6 +2643,7 @@ void GSRendererHW::Draw()
return; return;
} }
possible_shuffle &= src && (src->m_from_target != nullptr);
// We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // We don't know the alpha range of direct sources when we first tried to optimize the alpha test.
// Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // Moving the texture lookup before the ATST optimization complicates things a lot, so instead,
// recompute it, and everything derived from it again if it changes. // recompute it, and everything derived from it again if it changes.
@ -2762,7 +2770,7 @@ void GSRendererHW::Draw()
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
if (!ds) if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP)
{ {
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
true, 0, false, force_preload, preserve_depth, m_r, src); true, 0, false, force_preload, preserve_depth, m_r, src);
@ -2818,21 +2826,34 @@ void GSRendererHW::Draw()
if (!no_rt) if (!no_rt)
{ {
const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle()); IsPossibleChannelShuffle());
// FBW is going to be wrong for channel shuffling into a new target, so take it from the source. // FBW is going to be wrong for channel shuffling into a new target, so take it from the source.
FRAME_TEX0.U64 = 0; FRAME_TEX0.U64 = 0;
FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block();
FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM;
// Don't clamp on shuffle, the height cache may troll us with the REAL height. // Don't clamp on shuffle, the height cache may troll us with the REAL height.
if (!possible_shuffle && m_split_texture_shuffle_pages == 0) if (!possible_shuffle && m_split_texture_shuffle_pages == 0)
m_r = m_r.rintersect(t_size_rect); m_r = m_r.rintersect(t_size_rect);
// Do the lookup with the real format on a shuffle, if possible.
if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16)
{
// Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format.
const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx;
const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx];
if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM)
FRAME_TEX0.PSM = next_ctx.FRAME.PSM;
else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM)
FRAME_TEX0.PSM = next_ctx.TEX0.PSM;
else
FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later.
}
// Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead
// (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to
// create that target, because the clear isn't black, it'll hang around and never get invalidated. // create that target, because the clear isn't black, it'll hang around and never get invalidated.
@ -2845,7 +2866,7 @@ void GSRendererHW::Draw()
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(),
GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0));
// Draw skipped because it was a clear and there was no target. // Draw skipped because it was a clear and there was no target.
if (!rt) if (!rt)
@ -2882,13 +2903,68 @@ void GSRendererHW::Draw()
} }
else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block())
{ {
GSVertex* v = &m_vertex.buff[0]; int vertical_offset = ((static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast<int>(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it..
int vertical_offset = ((std::abs(static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it..
const int horizontal_offset = (std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; const int horizontal_offset = ((static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast<int>(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x;
// Used to reduce the offset made later in channel shuffles // Used to reduce the offset made later in channel shuffles
m_target_offset = std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); m_target_offset = std::abs(static_cast<int>((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5);
if (vertical_offset < 0)
{
rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block();
GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale;
// Make sure to use the original format for the offset.
int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y);
new_scaled_size.y += new_offset * rt->m_scale;
GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true);
//if (!tex)
// return nullptr;
//m_target_memory_usage += tex->GetMemUsage();
GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y);
g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false);
if (src && src->m_from_target && src->m_from_target == rt)
{
src->m_texture = rt->m_texture;
src->m_target_direct = false;
src->m_shared_texture = false;
}
else
{
//m_target_memory_usage -= dst->m_texture->GetMemUsage();
g_gs_device->Recycle(rt->m_texture);
}
rt->m_valid.y += new_offset;
rt->m_valid.w += new_offset;
rt->m_drawn_since_read.y += new_offset;
rt->m_drawn_since_read.w += new_offset;
rt->m_texture = tex;
rt->m_unscaled_size = new_scaled_size / rt->m_scale;
t_size.y += std::abs(vertical_offset);
vertical_offset = 0;
}
// Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right??
if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0))
{
int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
int z_offset = vertical_offset;
GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset);
GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale);
int new_height = std::max(static_cast<int>(ds->m_unscaled_size.y * ds->m_scale), dRect.w);
GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast<float>(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast<float>(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_texture_cache->SetTemporaryZ(tex);
}
GSVertex* v = &m_vertex.buff[0];
for (u32 i = 0; i < m_vertex.tail; i++) for (u32 i = 0; i < m_vertex.tail; i++)
{ {
v[i].XYZ.Y += vertical_offset << 4; v[i].XYZ.Y += vertical_offset << 4;
@ -2915,7 +2991,7 @@ void GSRendererHW::Draw()
// Don't resize if the BPP don't match. // Don't resize if the BPP don't match.
if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp)
{ {
if (t_size.y <= 0) if (m_r.w > rt->m_unscaled_size.y)
{ {
u32 new_height = m_r.w; u32 new_height = m_r.w;
@ -2923,8 +2999,11 @@ void GSRendererHW::Draw()
new_height /= 2; new_height /= 2;
//DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n);
rt->ResizeTexture(rt->m_unscaled_size.x, new_height); rt->ResizeTexture(rt->m_unscaled_size.x, new_height);
rt->UpdateValidity(m_r, true);
rt->UpdateDrawn(m_r, true); const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY));
rt->UpdateValidity(m_r, !frame_masked);
rt->UpdateDrawn(m_r, !frame_masked);
} }
} }
} }
@ -2953,6 +3032,75 @@ void GSRendererHW::Draw()
m_last_channel_shuffle_end_block = 0xFFFF; m_last_channel_shuffle_end_block = 0xFFFF;
} }
// Only run if DS was new and matched the framebuffer.
if (!no_ds && !ds)
{
ZBUF_TEX0.U64 = 0;
ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block();
ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW;
ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM;
ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil,
m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false,
src, -1);
ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW;
// This should never happen, but just to be safe..
if (!ds)
{
ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil,
true, 0, false, force_preload, preserve_depth, m_r, src);
if (!ds) [[unlikely]]
{
GL_INS("ERROR: Failed to create ZBUF target, skipping.");
CleanupDraw(true);
return;
}
}
else
{
// If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture.
if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32)
{
if (ds->m_alpha_max != 0)
{
const u32 max_z = (static_cast<u64>(ds->m_alpha_max + 1) << 24) - 1;
switch (m_cached_ctx.TEST.ZTST)
{
case ZTST_GEQUAL:
// Every Z value will pass
if (max_z <= m_vt.m_min.p.z)
{
m_cached_ctx.TEST.ZTST = ZTST_ALWAYS;
if (zm)
{
ds = nullptr;
no_ds = true;
}
}
break;
case ZTST_GREATER:
// Every Z value will pass
if (max_z < m_vt.m_min.p.z)
{
m_cached_ctx.TEST.ZTST = ZTST_ALWAYS;
if (zm)
{
ds = nullptr;
no_ds = true;
}
}
break;
default:
break;
}
}
}
}
}
if (m_process_texture) if (m_process_texture)
{ {
GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP;
@ -2966,7 +3114,8 @@ void GSRendererHW::Draw()
const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast<int>(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f);
const bool shuffle_coords = (first_x ^ first_u) & 8; const bool shuffle_coords = (first_x ^ first_u) & 8;
const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1;
const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start &&
src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) ||
(m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0));
@ -3207,8 +3356,8 @@ void GSRendererHW::Draw()
// The FBW should also be okay, since it's coming from the source. // The FBW should also be okay, since it's coming from the source.
if (rt) if (rt)
{ {
const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack());
rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW);
rt->m_TEX0.PSM = FRAME_TEX0.PSM; rt->m_TEX0.PSM = FRAME_TEX0.PSM;
} }
if (ds) if (ds)
@ -3217,6 +3366,11 @@ void GSRendererHW::Draw()
ds->m_TEX0.PSM = ZBUF_TEX0.PSM; ds->m_TEX0.PSM = ZBUF_TEX0.PSM;
} }
} }
// Probably grabbed an old 16bit target (Band Hero)
/*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16)
{
rt->m_TEX0.PSM = PSMCT32;
}*/
// Figure out which channels we're writing. // Figure out which channels we're writing.
if (rt) if (rt)
@ -3234,7 +3388,7 @@ void GSRendererHW::Draw()
GSVector2i new_size = t_size; GSVector2i new_size = t_size;
// We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size.
if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0)
{ {
if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y))
{ {
@ -3245,9 +3399,18 @@ void GSRendererHW::Draw()
} }
} }
if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0)
{
new_size.y = std::max(new_size.y, static_cast<int>((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2);
GSVector4i new_valid = rt->m_valid;
new_valid.w = new_size.y;
rt->UpdateValidity(new_valid, true);
}
// We still need to make sure the dimensions of the targets match. // We still need to make sure the dimensions of the targets match.
const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes.
const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)));
const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)));
if (rt) if (rt)
{ {
const u32 old_end_block = rt->m_end_block; const u32 old_end_block = rt->m_end_block;
@ -3259,6 +3422,25 @@ void GSRendererHW::Draw()
if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h)
GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h);
// May not be needed/could cause problems with garbage loaded from GS memory
if (preserve_rt_color)
{
RGBAMask mask;
mask._u32 = 0xF;
if (new_w > rt->m_unscaled_size.x)
{
GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h);
g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask);
}
if (new_h > rt->m_unscaled_size.y)
{
GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h);
g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask);
}
}
rt->ResizeTexture(new_w, new_h); rt->ResizeTexture(new_w, new_h);
if (!m_texture_shuffle && !m_channel_shuffle) if (!m_texture_shuffle && !m_channel_shuffle)
@ -3278,9 +3460,11 @@ void GSRendererHW::Draw()
} }
const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h)));
// if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z.
const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY));
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)));
rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)));
// Probably changing to double buffering, so invalidate any old target that was next to it. // Probably changing to double buffering, so invalidate any old target that was next to it.
// This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing.
// Grandia Xtreme, Onimusha Warlord. // Grandia Xtreme, Onimusha Warlord.
@ -3310,7 +3494,7 @@ void GSRendererHW::Draw()
const bool new_rect = ds->m_valid.rempty(); const bool new_rect = ds->m_valid.rempty();
const bool new_height = new_h > ds->GetUnscaledHeight(); const bool new_height = new_h > ds->GetUnscaledHeight();
const int old_height = ds->m_texture->GetHeight(); const int old_height = ds->m_texture->GetHeight();
const GSVector4i old_rect = ds->GetUnscaledRect();
pxAssert(ds->GetScale() == target_scale); pxAssert(ds->GetScale() == target_scale);
if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h)
GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h);
@ -3323,8 +3507,12 @@ void GSRendererHW::Draw()
} }
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z.
ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect.
const bool z_masked = m_cached_ctx.ZBUF.ZMSK;
ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2)));
ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2)));
if (!new_rect && new_height && old_end_block != ds->m_end_block) if (!new_rect && new_height && old_end_block != ds->m_end_block)
{ {
@ -3423,7 +3611,9 @@ void GSRendererHW::Draw()
{ {
s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM));
if (ds->m_texture) if (g_texture_cache->GetTemporaryZ())
g_texture_cache->GetTemporaryZ()->Save(s);
else if (ds->m_texture)
ds->m_texture->Save(s); ds->m_texture->Save(s);
} }
} }
@ -3512,9 +3702,10 @@ void GSRendererHW::Draw()
if ((fm & fm_mask) != fm_mask && rt) if ((fm & fm_mask) != fm_mask && rt)
{ {
const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY));
//rt->m_valid = rt->m_valid.runion(r); //rt->m_valid = rt->m_valid.runion(r);
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)));
g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false);
@ -3525,15 +3716,31 @@ void GSRendererHW::Draw()
if (zm != 0xffffffff && ds) if (zm != 0xffffffff && ds)
{ {
const bool z_masked = m_cached_ctx.ZBUF.ZMSK;
//ds->m_valid = ds->m_valid.runion(r); //ds->m_valid = ds->m_valid.runion(r);
// Limit to 2x the vertical height of the resolution (for double buffering) // Limit to 2x the vertical height of the resolution (for double buffering)
ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)));
g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false);
// Remove overwritten RTs at the ZBP. // Remove overwritten RTs at the ZBP.
g_texture_cache->InvalidateVideoMemType( g_texture_cache->InvalidateVideoMemType(
GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm);
if (g_texture_cache->GetTemporaryZ())
{
if (m_cached_ctx.DepthWrite())
{
int vertical_offset = ((static_cast<int>(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast<int>(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y;
int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
int z_offset = vertical_offset;
GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset);
GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale);
g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast<float>(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
}
}
} }
// //
@ -4023,7 +4230,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
m_conf.ps.urban_chaos_hle = 1; m_conf.ps.urban_chaos_hle = 1;
} }
} }
else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3)
{ {
// Blood will tell. I think it is channel effect too but again // Blood will tell. I think it is channel effect too but again
// implemented in a different way. I don't want to add more CRC stuff. So // implemented in a different way. I don't want to add more CRC stuff. So
@ -4180,7 +4387,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
// Performance GPU note: it could be wise to reduce the size to // Performance GPU note: it could be wise to reduce the size to
// the rendered size of the framebuffer // the rendered size of the framebuffer
if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy()))
{ {
GSVertex* s = &m_vertex.buff[0]; GSVertex* s = &m_vertex.buff[0];
s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 0); s[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + 0);
@ -5604,6 +5811,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu
return false; return false;
} }
// the texture is offset, and the frame isn't also offset, we can't do this.
if (tex->GetRegion().HasX() || tex->GetRegion().HasY())
{
if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0)
return false;
}
// If we're a shuffle, tex-is-fb is always fine. // If we're a shuffle, tex-is-fb is always fine.
if (m_texture_shuffle || m_channel_shuffle) if (m_texture_shuffle || m_channel_shuffle)
{ {
@ -5753,6 +5967,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src)
if (invalidate_temp_src) if (invalidate_temp_src)
g_texture_cache->InvalidateTemporarySource(); g_texture_cache->InvalidateTemporarySource();
g_texture_cache->InvalidateTemporaryZ();
// Restore Scissor. // Restore Scissor.
m_context->UpdateScissor(); m_context->UpdateScissor();
@ -5792,7 +6007,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.cb_vs.texture_offset = {}; m_conf.cb_vs.texture_offset = {};
m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.ps.scanmsk = env.SCANMSK.MSK;
m_conf.rt = rt ? rt->m_texture : nullptr; m_conf.rt = rt ? rt->m_texture : nullptr;
m_conf.ds = ds ? ds->m_texture : nullptr; m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr;
// Z setup has to come before channel shuffle // Z setup has to come before channel shuffle
EmulateZbuffer(ds); EmulateZbuffer(ds);
@ -6163,7 +6378,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing());
// Restrict this to only when we're overwriting the whole target. // Restrict this to only when we're overwriting the whole target.
new_scale_rt_alpha = full_cover; new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n;
} }
} }
@ -7248,7 +7463,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
g_texture_cache->InvalidateContainedTargets( g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress( GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r),
rt_end_bp, m_cached_ctx.FRAME.PSM); rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW);
GSUploadQueue clear_queue; GSUploadQueue clear_queue;
clear_queue.draw = s_n; clear_queue.draw = s_n;
@ -7271,7 +7486,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r
g_texture_cache->InvalidateContainedTargets( g_texture_cache->InvalidateContainedTargets(
GSLocalMemory::GetStartBlockAddress( GSLocalMemory::GetStartBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r),
ds_end_bp, m_cached_ctx.ZBUF.PSM); ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW);
} }
} }

View File

@ -149,7 +149,8 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm
if (rect.rempty()) if (rect.rempty())
return; return;
if (rect.w > 2048)
DevCon.Warning("BAd");
std::vector<GSDirtyRect>::iterator it = target->m_dirty.end(); std::vector<GSDirtyRect>::iterator it = target->m_dirty.end();
while (it != target->m_dirty.begin()) while (it != target->m_dirty.begin())
{ {
@ -274,6 +275,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw
const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x;
in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0));
in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0));
// Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here.
if (in_rect.x >= (dst_pgw * dst_page_size.x))
{
in_rect.z -= dst_pgw * dst_page_size.x;
in_rect.x -= dst_pgw * dst_page_size.x;
in_rect.y += dst_page_size.y;
in_rect.w += dst_page_size.y;
}
page_offset = 0; page_offset = 0;
single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1;
} }
@ -1448,8 +1458,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
// Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't.
// Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3)
else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets &&
(GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups.
t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/)
{ {
if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32))
@ -1481,7 +1491,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
} }
if (bp > t->m_TEX0.TBP0) if (bp > t->m_TEX0.TBP0)
{ {
GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect;
if (linear) if (linear)
{ {
new_rect.z -= 1; new_rect.z -= 1;
@ -1586,15 +1596,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
// Omitting that check here seemed less risky than blowing CS targets out... // Omitting that check here seemed less risky than blowing CS targets out...
const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs;
const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm);
const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY());
if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() &&
(region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 &&
offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 ||
(offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))
{ {
GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)",
t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(),
(region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x));
x_offset = -region.GetMinX();
y_offset = -region.GetMinY(); x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX();
y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY();
dst = t; dst = t;
tex_merge_rt = false; tex_merge_rt = false;
found_t = true; found_t = true;
@ -1827,7 +1840,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
// TODO: Move all frame stuff to its own routine too. // TODO: Move all frame stuff to its own routine too.
if (!is_frame) if (!is_frame)
{ {
for (auto i = list.begin(); i != list.end(); ++i) for (auto i = list.begin(); i != list.end();)
{ {
Target* t = *i; Target* t = *i;
@ -1838,6 +1851,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
{ {
DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0);
i++;
continue; continue;
} }
@ -1896,21 +1910,26 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
InvalidateSourcesFromTarget(t); InvalidateSourcesFromTarget(t);
i = list.erase(i); i = list.erase(i);
delete t; delete t;
continue;
} }
} }
// Probably pointing to half way through the target // Probably pointing to half way through the target
else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets)
{ {
if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z
/*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset)
{
continue; continue;
}*/
const u32 widthpage_offset = (std::abs(static_cast<int>(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); const u32 widthpage_offset = (std::abs(static_cast<int>(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U);
const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast<u32>(min_rect.width()) <= (widthpage_offset * 64))); const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast<u32>(min_rect.width()) <= (widthpage_offset * 64)));
if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect))
{ { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/
// If it's too old, it's probably not a real target to jump in to anymore. // If it's too old, it's probably not a real target to jump in to anymore.
if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle &&
!(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || !(widthpage_offset == 0 || min_rect.width() <= 64 ||
(widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64)))))
{ {
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
@ -1918,6 +1937,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
i = list.erase(i); i = list.erase(i);
delete t; delete t;
} }
else*/
if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM))
{
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t);
i = list.erase(i);
delete t;
continue;
}
else else
{ {
//DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width());
@ -1931,6 +1960,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
} }
} }
} }
i++;
} }
} }
else else
@ -2085,7 +2116,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_alpha_min = 0; dst->m_alpha_min = 0;
dst->m_alpha_max = 0; dst->m_alpha_max = 0;
} }
else if (!is_shuffle && std::abs(static_cast<s16>(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) else if (std::abs(static_cast<s16>(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16)
{ {
dst->Update(false); dst->Update(false);
@ -2116,34 +2147,38 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_valid.y /= 2; dst->m_valid.y /= 2;
dst->m_valid.w /= 2; dst->m_valid.w /= 2;
} }
GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, if (!is_shuffle)
dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y,
scale);
//DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n);
GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) :
g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true);
if (!tex)
return nullptr;
m_target_memory_usage += tex->GetMemUsage();
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
if (src && src->m_from_target && src->m_from_target == dst)
{ {
src->m_texture = dst->m_texture; GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y,
src->m_target_direct = false; dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y,
src->m_shared_texture = false; scale);
} //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n);
else GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) :
{ g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true);
m_target_memory_usage -= dst->m_texture->GetMemUsage(); if (!tex)
g_gs_device->Recycle(dst->m_texture); return nullptr;
} m_target_memory_usage += tex->GetMemUsage();
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
if (src && src->m_from_target && src->m_from_target == dst)
{
src->m_texture = dst->m_texture;
src->m_target_direct = false;
src->m_shared_texture = false;
}
else
{
m_target_memory_usage -= dst->m_texture->GetMemUsage();
g_gs_device->Recycle(dst->m_texture);
}
dst->m_texture = tex;
dst->m_unscaled_size = new_size;
}
// New format or doing a shuffle to a 32bit target that used to be 16bit
dst->m_TEX0.PSM = TEX0.PSM; dst->m_TEX0.PSM = TEX0.PSM;
dst->m_texture = tex;
dst->m_unscaled_size = new_size;
} }
@ -2347,7 +2382,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_valid_rgb = dst_match->m_valid_rgb;
dst->m_was_dst_matched = true; dst->m_was_dst_matched = true;
dst_match->m_was_dst_matched = true; dst_match->m_was_dst_matched = true;
dst_match->m_valid_rgb = false; dst_match->m_valid_rgb = preserve_rgb;
if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16)
dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries).
@ -2572,7 +2607,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
if (valid_draw_size && supported_fmt) if (valid_draw_size && supported_fmt)
{ {
const GSVector4i newrect = GSVector4i::loadh(size); const GSVector4i newrect = GSVector4i::loadh(valid_size);
const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect);
RGBAMask rgba; RGBAMask rgba;
@ -3167,7 +3202,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo
return true; return true;
} }
void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw)
{ {
const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24);
for (int type = 0; type < 2; type++) for (int type = 0; type < 2; type++)
@ -3176,22 +3211,24 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
for (auto i = list.begin(); i != list.end();) for (auto i = list.begin(); i != list.end();)
{ {
Target* const t = *i; Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty()))
{ {
++i; ++i;
continue; continue;
} }
const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5;
// Not covering the whole target, and a different format, so just dirty it. // Not covering the whole target, and a different format, so just dirty it.
if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW)
{ {
const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm];
u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5);
GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y;
InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y));
InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true);
++i; ++i;
continue; continue;
} }*/
InvalidateSourcesFromTarget(t); InvalidateSourcesFromTarget(t);
@ -3874,6 +3911,19 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32))
return false; return false;
// This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one.
if (dst && DBP == SBP && dy > dst->m_unscaled_size.y)
{
u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5);
dst = nullptr;
DBP = new_DBP;
dy = 0;
dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP);
}
// Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // Beware of the case where a game might create a larger texture by moving a bunch of chunks around.
if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) if (dst && DBP == SBP && dy > dst->m_unscaled_size.y)
{ {
@ -3960,7 +4010,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// Make sure the copy doesn't go out of bounds (it shouldn't). // Make sure the copy doesn't go out of bounds (it shouldn't).
if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight())
return false; return false;
DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW,
psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h);
const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid);
@ -4086,6 +4136,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u
// Invalidate any sources that overlap with the target (since they're now stale). // Invalidate any sources that overlap with the target (since they're now stale).
InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false);
return true; return true;
} }
@ -4272,7 +4323,7 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type,
{ {
Target* t = *it; Target* t = *it;
if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp)
{ {
rts.MoveFront(it.Index()); rts.MoveFront(it.Index());
return t; return t;
@ -4988,6 +5039,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset,
std::max<u32>(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max<u32>(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex,
std::max<u32>(TEX0.TBW, 1u) * 64, TEX0.PSM); std::max<u32>(TEX0.TBW, 1u) * 64, TEX0.PSM);
src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw);
src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th);
} }
else else
{ {
@ -5139,8 +5193,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
{ {
// We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets
// TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end.
const int tex_width = std::max<int>(64 * TEX0.TBW, region.GetMaxX()); // Round the size up to the next block
const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const int tex_width = (std::max<int>(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1);
const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1);
const int scaled_width = static_cast<int>(static_cast<float>(tex_width) * scale); const int scaled_width = static_cast<int>(static_cast<float>(tex_width) * scale);
const int scaled_height = static_cast<int>(static_cast<float>(tex_height) * scale); const int scaled_height = static_cast<int>(static_cast<float>(tex_height) * scale);
@ -6602,7 +6658,9 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
} }
// Else No valid size, so need to resize down. // Else No valid size, so need to resize down.
@ -6612,13 +6670,18 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize)
{ {
if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize)
DevCon.Warning("Here");
if (m_valid.eq(GSVector4i::zero())) if (m_valid.eq(GSVector4i::zero()))
{ {
m_valid = rect; m_valid = rect;
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
} }
else if (can_resize) else if (can_resize)
{ {
@ -6626,7 +6689,9 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
m_end_block += offset;
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
} }
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
} }
@ -6997,6 +7062,29 @@ void GSTextureCache::InvalidateTemporarySource()
m_temporary_source = nullptr; m_temporary_source = nullptr;
} }
void GSTextureCache::SetTemporaryZ(GSTexture* temp_z)
{
m_temporary_z = temp_z;
}
GSTexture* GSTextureCache::GetTemporaryZ()
{
if (!m_temporary_z)
return nullptr;
return m_temporary_z;
}
void GSTextureCache::InvalidateTemporaryZ()
{
if (!m_temporary_z)
return;
g_gs_device->Recycle(m_temporary_z);
m_temporary_z = nullptr;
}
void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax) void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax)
{ {
// When we insert we update memory usage. Old texture gets removed below. // When we insert we update memory usage. Old texture gets removed below.

View File

@ -427,6 +427,7 @@ protected:
std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache; std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache;
Source* m_temporary_source = nullptr; // invalidated after the draw Source* m_temporary_source = nullptr; // invalidated after the draw
GSTexture* m_temporary_z = nullptr; // invalidated after the draw
std::unique_ptr<GSDownloadTexture> m_color_download_texture; std::unique_ptr<GSDownloadTexture> m_color_download_texture;
std::unique_ptr<GSDownloadTexture> m_uint16_download_texture; std::unique_ptr<GSDownloadTexture> m_uint16_download_texture;
@ -508,7 +509,7 @@ public:
bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits<u32>::max(), bool move_front = true); bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits<u32>::max(), bool move_front = true);
bool Has32BitTarget(u32 bp); bool Has32BitTarget(u32 bp);
void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1);
void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false);
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
@ -551,6 +552,11 @@ public:
/// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw.
void InvalidateTemporarySource(); void InvalidateTemporarySource();
void SetTemporaryZ(GSTexture* temp_z);
GSTexture* GetTemporaryZ();
/// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is
void InvalidateTemporaryZ();
/// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred.
void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax); void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax);

View File

@ -1168,11 +1168,8 @@ struct PSMain
{ {
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{ {
C.rb = C.br; C.b = C.r;
float g_temp = C.g; C.a = C.g;
C.g = C.a;
C.a = g_temp;
} }
else if(PS_PROCESS_BA & SHUFFLE_READ) else if(PS_PROCESS_BA & SHUFFLE_READ)
{ {