GS/HW: More RT in RT regression fixes

This commit is contained in:
refractionpcsx2 2025-03-04 19:10:41 +00:00 committed by Ty
parent 13ee2abeef
commit a6d5598c08
12 changed files with 316 additions and 138 deletions

View File

@ -1127,7 +1127,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
{
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.br = C.rb;
C.br = C.rb;
C.ag = C.ga;
}
else if(PS_PROCESS_BA & SHUFFLE_READ)

View File

@ -1095,7 +1095,7 @@ void ps_main()
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.br = C.rb;
C.br = C.rb;
C.ag = C.ga;
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;

View File

@ -1343,8 +1343,6 @@ void main()
#endif
#endif
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_PROCESS_BA & SHUFFLE_READ)
@ -1362,7 +1360,7 @@ void main()
// Write RB part. Mask will take care of the correct destination
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.br = C.rb;
C.br = C.rb;
C.ag = C.ga;
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;

View File

@ -435,6 +435,15 @@ void GSgifTransfer3(u8* mem, u32 size)
void GSvsync(u32 field, bool registers_written)
{
// Update this here because we need to check if the pending draw affects the current frame, so our regs need to be updated.
g_gs_renderer->PCRTCDisplays.SetVideoMode(g_gs_renderer->GetVideoMode());
g_gs_renderer->PCRTCDisplays.EnableDisplays(g_gs_renderer->m_regs->PMODE, g_gs_renderer->m_regs->SMODE2, g_gs_renderer->isReallyInterlaced());
g_gs_renderer->PCRTCDisplays.CheckSameSource();
g_gs_renderer->PCRTCDisplays.SetRects(0, g_gs_renderer->m_regs->DISP[0].DISPLAY, g_gs_renderer->m_regs->DISP[0].DISPFB);
g_gs_renderer->PCRTCDisplays.SetRects(1, g_gs_renderer->m_regs->DISP[1].DISPLAY, g_gs_renderer->m_regs->DISP[1].DISPFB);
g_gs_renderer->PCRTCDisplays.CalculateDisplayOffset(g_gs_renderer->m_scanmask_used);
g_gs_renderer->PCRTCDisplays.CalculateFramebufferOffset(g_gs_renderer->m_scanmask_used);
// Do not move the flush into the VSync() method. It's here because EE transfers
// get cleared in HW VSync, and may be needed for a buffered draw (FFX FMVs).
g_gs_renderer->Flush(GSState::VSYNC);

View File

@ -1477,6 +1477,35 @@ void GSState::Flush(GSFlushReason reason)
if (m_index.tail > 0)
{
// Unless Vsync really needs the pending draw, don't do it when VSync happens as it can really screw up our heuristics when looking ahead.
if (reason == VSYNC)
{
GSDrawingContext* draw_ctx = &m_prev_env.CTXT[m_prev_env.PRIM.CTXT];
const u32 start_bp = GSLocalMemory::GetStartBlockAddress(draw_ctx->FRAME.Block(), draw_ctx->FRAME.FBW, draw_ctx->FRAME.PSM, temp_draw_rect);
const u32 end_bp = GSLocalMemory::GetEndBlockAddress(draw_ctx->FRAME.Block(), draw_ctx->FRAME.FBW, draw_ctx->FRAME.PSM, temp_draw_rect);
bool needs_flush[2] = {PCRTCDisplays.PCRTCDisplays[0].enabled, PCRTCDisplays.PCRTCDisplays[1].enabled};
if (PCRTCDisplays.PCRTCDisplays[1].enabled)
{
const u32 out_start_bp = GSLocalMemory::GetStartBlockAddress(PCRTCDisplays.PCRTCDisplays[1].Block(), PCRTCDisplays.PCRTCDisplays[1].FBW, PCRTCDisplays.PCRTCDisplays[1].PSM, PCRTCDisplays.PCRTCDisplays[1].framebufferRect);
const u32 out_end_bp = GSLocalMemory::GetEndBlockAddress(PCRTCDisplays.PCRTCDisplays[1].Block(), PCRTCDisplays.PCRTCDisplays[1].FBW, PCRTCDisplays.PCRTCDisplays[1].PSM, PCRTCDisplays.PCRTCDisplays[1].framebufferRect);
if (out_start_bp > end_bp || out_end_bp < start_bp)
needs_flush[1] = false;
}
if (PCRTCDisplays.PCRTCDisplays[0].enabled)
{
const u32 out_start_bp = GSLocalMemory::GetStartBlockAddress(PCRTCDisplays.PCRTCDisplays[0].Block(), PCRTCDisplays.PCRTCDisplays[0].FBW, PCRTCDisplays.PCRTCDisplays[0].PSM, PCRTCDisplays.PCRTCDisplays[0].framebufferRect);
const u32 out_end_bp = GSLocalMemory::GetEndBlockAddress(PCRTCDisplays.PCRTCDisplays[0].Block(), PCRTCDisplays.PCRTCDisplays[0].FBW, PCRTCDisplays.PCRTCDisplays[0].PSM, PCRTCDisplays.PCRTCDisplays[0].framebufferRect);
if (out_start_bp > end_bp || out_end_bp < start_bp)
needs_flush[0] = false;
}
if (!needs_flush[0] && !needs_flush[1])
return;
}
m_state_flush_reason = reason;
// Used to prompt the current draw that it's modifying its own CLUT.
@ -1942,10 +1971,10 @@ void GSState::Write(const u8* mem, int len)
m_draw_transfers.push_back(new_transfer);
}
GL_CACHE("Write! %u ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)", s_transfer_n,
GL_CACHE("Write! %u ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d) draw %d", s_transfer_n,
blit.DBP, blit.DBW, psm_str(blit.DPSM),
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h);
m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h, s_n);
if (len >= m_tr.total)
{
@ -3093,7 +3122,7 @@ void GSState::CalculatePrimitiveCoversWithoutGaps()
}
else if (m_vt.m_primclass == GS_TRIANGLE_CLASS)
{
m_primitive_covers_without_gaps = ((m_index.tail % 6) == 0 && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound;
m_primitive_covers_without_gaps = ((m_index.tail == 6 || ((m_index.tail % 6) == 0 && m_primitive_covers_without_gaps == FullCover)) && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound;
return;
}
else if (m_vt.m_primclass != GS_SPRITE_CLASS)
@ -3123,7 +3152,7 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim)
{
// Pretty confident here...
GSVertex* buffer = &m_vertex.buff[0];
const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64;
const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) <= 256; // Lequal to 16 pixels apart.
if (const_spacing)
return false;
@ -4728,10 +4757,16 @@ GSVector2i GSState::GSPCRTCRegs::GetFramebufferSize(int display)
void GSState::GSPCRTCRegs::SetRects(int display, GSRegDISPLAY displayReg, GSRegDISPFB framebufferReg)
{
// Save framebuffer information first, while we're here.
PCRTCDisplays[display].prevFramebufferReg.FBP = PCRTCDisplays[display].FBP;
PCRTCDisplays[display].prevFramebufferReg.FBW = PCRTCDisplays[display].FBW;
PCRTCDisplays[display].prevFramebufferReg.PSM = PCRTCDisplays[display].PSM;
PCRTCDisplays[display].prevFramebufferReg.DBX = PCRTCDisplays[display].DBX;
PCRTCDisplays[display].prevFramebufferReg.DBY = PCRTCDisplays[display].DBY;
PCRTCDisplays[display].FBP = framebufferReg.FBP;
PCRTCDisplays[display].FBW = framebufferReg.FBW;
PCRTCDisplays[display].PSM = framebufferReg.PSM;
PCRTCDisplays[display].prevFramebufferReg = framebufferReg;
PCRTCDisplays[display].DBX = framebufferReg.DBX;
PCRTCDisplays[display].DBY = framebufferReg.DBY;
// Probably not really enabled but will cause a mess.
// Q-Ball Billiards enables both circuits but doesn't set one of them up.
if (PCRTCDisplays[display].FBW == 0 && displayReg.DW == 0 && displayReg.DH == 0 && displayReg.MAGH == 0)

View File

@ -323,6 +323,8 @@ public:
int FBP;
int FBW;
int PSM;
int DBY;
int DBX;
GSRegDISPFB prevFramebufferReg;
GSVector2i prevDisplayOffset;
GSVector2i displayOffset;

View File

@ -87,10 +87,6 @@ bool GSRenderer::Merge(int field)
int y_offset[3] = { 0, 0, 0 };
const bool feedback_merge = m_regs->EXTWRITE.WRITE == 1;
PCRTCDisplays.SetVideoMode(GetVideoMode());
PCRTCDisplays.EnableDisplays(m_regs->PMODE, m_regs->SMODE2, isReallyInterlaced());
PCRTCDisplays.CheckSameSource();
if (!PCRTCDisplays.PCRTCDisplays[0].enabled && !PCRTCDisplays.PCRTCDisplays[1].enabled)
{
m_real_size = GSVector2i(0, 0);
@ -101,11 +97,6 @@ bool GSRenderer::Merge(int field)
const bool game_deinterlacing = (m_regs->DISP[0].DISPFB.DBY != PCRTCDisplays.PCRTCDisplays[0].prevFramebufferReg.DBY) !=
(m_regs->DISP[1].DISPFB.DBY != PCRTCDisplays.PCRTCDisplays[1].prevFramebufferReg.DBY);
PCRTCDisplays.SetRects(0, m_regs->DISP[0].DISPLAY, m_regs->DISP[0].DISPFB);
PCRTCDisplays.SetRects(1, m_regs->DISP[1].DISPLAY, m_regs->DISP[1].DISPFB);
PCRTCDisplays.CalculateDisplayOffset(m_scanmask_used);
PCRTCDisplays.CalculateFramebufferOffset(m_scanmask_used);
// Only need to check the right/bottom on software renderer, hardware always gets the full texture then cuts a bit out later.
if (PCRTCDisplays.FrameRectMatch() && !PCRTCDisplays.FrameWrap() && !feedback_merge)
{

View File

@ -587,6 +587,12 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
v[i + reversed_pos].XYZ.X -= 128u;
v[i + 1 - reversed_pos].XYZ.X -= 128u;
}
// Needed for when there's no barriers.
if (v[i + reversed_U].U & 128)
{
v[i + reversed_U].U -= 128u;
v[i + 1 - reversed_U].U -= 128u;
}
}
if (half_bottom_vert)
@ -649,6 +655,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba,
else
v[i + 1 - reversed_S].ST.S += offset_8pix;
}
else
{
if (static_cast<int>(v[i + reversed_S].ST.S * tw) & 8)
{
v[i + reversed_S].ST.S -= offset_8pix;
v[i + 1 - reversed_S].ST.S -= offset_8pix;
}
}
if (half_bottom_vert)
{
@ -2481,7 +2495,7 @@ void GSRendererHW::Draw()
}
// We trigger the sw prim render here super early, to avoid creating superfluous render targets.
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true))
if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex && m_process_texture) && SwPrimRender(*this, true, true))
{
GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)",
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW);
@ -2643,27 +2657,34 @@ void GSRendererHW::Draw()
// Try to fix large single-page-wide draws.
bool height_invalid = m_r.w >= 1024;
const GSVector2i& pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs;
if (height_invalid && m_cached_ctx.FRAME.FBW <= 1 &&
TryToResolveSinglePageFramebuffer(m_cached_ctx.FRAME, true))
{
const GSVector2i& pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs;
ReplaceVerticesWithSprite(
GetDrawRectForPages(m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, (m_r.w + (pgs.y - 1)) / pgs.y),
GSVector2i(1, 1));
height_invalid = false;
}
const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color);
const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth);
// Be careful of being 1 pixel from filled.
const bool page_aligned = (m_r.w % pgs.y) == (pgs.y - 1) || (m_r.w % pgs.y) == 0;
const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color && page_aligned);
const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth && page_aligned);
// If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets.
// If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth.
if (is_zero_color_clear || is_zero_depth_clear || height_invalid)
{
const u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r);
const u32 ds_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress(
m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r);
// This can get missed by the double half clear, but we can make sure we nuke everything inside if the Z is butted up against the FRAME.
if (!no_ds && (rt_end_bp + 1) == m_cached_ctx.ZBUF.Block() && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp)
rt_end_bp = ds_end_bp;
// If this is a partial clear of a larger buffer, we can't invalidate the target, since we'll be losing data
// which only existed on the GPU. Assume a BW change is a new target, though. Test case: Persona 3 shadows.
GSTextureCache::Target* tgt;
@ -2893,7 +2914,10 @@ void GSRendererHW::Draw()
// TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it.
// Example games: Evolution Snowboarding, Final Fantasy Dirge of Cerberus, Red Dead Revolver, Stuntman, Tony Hawk's Underground 2, Ultimate Spider-Man.
if (!req_color && !alpha_used)
{
m_process_texture = false;
possible_shuffle = false;
}
else
{
src = tex_psm.depth ? g_texture_cache->LookupDepthSource(true, TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) :
@ -2971,31 +2995,36 @@ void GSRendererHW::Draw()
const GSVector4i unclamped_draw_rect = m_r;
float target_scale = GetTextureScaleFactor();
bool scaled_copy = false;
int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound);
if (target_scale > 1.0f && scale_draw > 0)
if (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off)
{
// 1 == Downscale, so we need to reduce the size of the target also.
// 2 == Upscale, so likely putting it over the top of the render target.
if (scale_draw == 1)
if (target_scale > 1.0f && scale_draw > 0)
{
target_scale = 1.0f;
m_downscale_source = src->m_from_target->GetScale() > 1.0f;
// 1 == Downscale, so we need to reduce the size of the target also.
// 2 == Upscale, so likely putting it over the top of the render target.
if (scale_draw == 1)
{
target_scale = 1.0f;
m_downscale_source = src->m_from_target->GetScale() > 1.0f;
}
else
m_downscale_source = GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa.
}
else
m_downscale_source = GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa.
}
else
{
// if it's directly copying keep the scale - Ratchet and clank hits this, stops edge garbage happening.
// Keep it to small targets of 256 or lower.
if (scale_draw == -1 && src && src->m_from_target && src->m_from_target->m_downscaled && static_cast<int>(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) &&
(GSVector4i(m_vt.m_min.p).xyxy() == GSVector4i(m_vt.m_min.t).xyxy()).alltrue() && (GSVector4i(m_vt.m_max.p).xyxy() == GSVector4i(m_vt.m_max.t).xyxy()).alltrue())
{
target_scale = src->m_from_target->GetScale();
scale_draw = 1;
}
// if it's directly copying keep the scale - Ratchet and clank hits this, stops edge garbage happening.
// Keep it to small targets of 256 or lower.
if (scale_draw == -1 && src && src->m_from_target && src->m_from_target->m_downscaled && static_cast<int>(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) &&
(GSVector4i(m_vt.m_min.p).xyxy() == GSVector4i(m_vt.m_min.t).xyxy()).alltrue() && (GSVector4i(m_vt.m_max.p).xyxy() == GSVector4i(m_vt.m_max.t).xyxy()).alltrue())
{
target_scale = src->m_from_target->GetScale();
scale_draw = 1;
scaled_copy = true;
}
m_downscale_source = false;
m_downscale_source = false;
}
}
if (IsPossibleChannelShuffle() && src && src->m_from_target && src->m_from_target->GetScale() != target_scale)
@ -3101,7 +3130,7 @@ void GSRendererHW::Draw()
if (!no_rt)
{
possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) &&
possible_shuffle |= draw_sprite_tex && m_process_texture && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) &&
(GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 || draw_uses_target) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle());
@ -3157,7 +3186,7 @@ void GSRendererHW::Draw()
// Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size.
// Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed.
const bool preserve_downscale_draw = std::abs(scale_draw) == 1 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw));
const bool preserve_downscale_draw = (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && (std::abs(scale_draw) == 1 || (scale_draw == 0 && src && src->m_from_target && src->m_from_target->m_downscaled))) || is_possible_mem_clear == ClearType::ClearWithDraw;
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(),
@ -3198,7 +3227,7 @@ void GSRendererHW::Draw()
return;
}
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale,
rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale,
GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src);
if (!rt) [[unlikely]]
@ -3245,7 +3274,7 @@ void GSRendererHW::Draw()
if (rt->m_dirty.size())
{
for (int i = 0; i < rt->m_dirty.size(); i++)
for (int i = 0; i < static_cast<int>(rt->m_dirty.size()); i++)
{
rt->m_dirty[i].r.y += new_offset;
rt->m_dirty[i].r.w += new_offset;
@ -3266,34 +3295,6 @@ void GSRendererHW::Draw()
if (vertical_offset || horizontal_offset)
{
// Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right??
if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0))
{
const int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
if (g_texture_cache->GetTemporaryZ() != nullptr)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
if (ds->m_TEX0.TBP0 != z_address_info.ZBP || z_address_info.offset != (vertical_offset - z_vertical_offset))
g_texture_cache->InvalidateTemporaryZ();
}
if (g_texture_cache->GetTemporaryZ() == nullptr)
{
m_temp_z_full_copy = false;
u32 vertical_size = std::max(rt->m_unscaled_size.y, ds->m_unscaled_size.y);
GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, (vertical_offset + ds->m_unscaled_size.y - z_vertical_offset) * ds->m_scale);
const int new_height = std::max(static_cast<int>(vertical_size * ds->m_scale), dRect.w);
GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast<float>(ds->m_unscaled_size.y), 1.0f, (ds->m_unscaled_size.y - z_vertical_offset) / static_cast<float>(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_texture_cache->SetTemporaryZ(tex);
g_texture_cache->SetTemporaryZInfo(ds->m_TEX0.TBP0, vertical_offset - z_vertical_offset);
}
m_using_temp_z = true;
}
GSVertex* v = &m_vertex.buff[0];
for (u32 i = 0; i < m_vertex.tail; i++)
@ -3336,6 +3337,36 @@ void GSRendererHW::Draw()
t_size.x = rt->m_unscaled_size.x - horizontal_offset;
t_size.y = rt->m_unscaled_size.y - vertical_offset;
// Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right??
if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0))
{
const int z_vertical_offset = ((static_cast<int>(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y;
if (g_texture_cache->GetTemporaryZ() != nullptr)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
if (ds->m_TEX0.TBP0 != z_address_info.ZBP || z_address_info.offset != static_cast<u32>(vertical_offset - z_vertical_offset))
g_texture_cache->InvalidateTemporaryZ();
}
if (g_texture_cache->GetTemporaryZ() == nullptr)
{
m_temp_z_full_copy = false;
u32 vertical_size = std::max(rt->m_unscaled_size.y, ds->m_unscaled_size.y);
GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset);
GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, (vertical_offset + ds->m_unscaled_size.y - z_vertical_offset) * ds->m_scale);
const int new_height = std::max(static_cast<int>(vertical_size * ds->m_scale), dRect.w);
GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast<float>(ds->m_unscaled_size.y), 1.0f, (ds->m_unscaled_size.y - z_vertical_offset) / static_cast<float>(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_texture_cache->SetTemporaryZ(tex);
g_texture_cache->SetTemporaryZInfo(ds->m_TEX0.TBP0, vertical_offset - z_vertical_offset);
t_size.y = std::max(new_height, t_size.y);
}
m_using_temp_z = true;
}
}
// Don't resize if the BPP don't match.
if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp)
@ -3373,7 +3404,7 @@ void GSRendererHW::Draw()
}
}
}
if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt)
{
src->m_texture = rt->m_texture;
@ -3392,6 +3423,7 @@ void GSRendererHW::Draw()
// Slightly abusing the texture resize.
ds->m_scale = target_scale;
ds->m_unscaled_size = unscaled_size;
ds->m_downscaled = rt->m_downscaled;
}
// The target might have previously been a C32 format with valid alpha. If we're switching to C24, we need to preserve it.
preserve_rt_alpha |= (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == 24 && rt->HasValidAlpha());
@ -3713,17 +3745,16 @@ void GSRendererHW::Draw()
if (m_cached_ctx.FRAME.FBMSK & 0xF0000000)
rt->m_valid_alpha_high = false;
}
if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y))
if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y) || (scale_draw == 1 && !scaled_copy))
{
FRAME_TEX0.TBP0 = rt->m_TEX0.TBP0;
rt->m_TEX0 = FRAME_TEX0;
}
}
if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw)
{
if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y))
if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y) || (scale_draw == 1 && !scaled_copy))
{
ZBUF_TEX0.TBP0 = ds->m_TEX0.TBP0;
ds->m_TEX0 = ZBUF_TEX0;
@ -3821,8 +3852,10 @@ void GSRendererHW::Draw()
// We still need to make sure the dimensions of the targets match.
// Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes.
const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)));
const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)));
GSVector2i ds_size = m_using_temp_z ? GSVector2i(g_texture_cache->GetTemporaryZ()->GetSize() / ds->m_scale) : (ds ? ds->m_unscaled_size : GSVector2i(0,0));
const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds_size.x : 0)));
const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds_size.y : 0)));
if (rt)
{
const u32 old_end_block = rt->m_end_block;
@ -3915,6 +3948,23 @@ void GSRendererHW::Draw()
ds->ResizeTexture(new_w, new_h);
if (m_using_temp_z)
{
const int z_width = g_texture_cache->GetTemporaryZ()->GetWidth() / ds->m_scale;
const int z_height = g_texture_cache->GetTemporaryZ()->GetHeight() / ds->m_scale;
if (z_width != new_w || z_height != new_h)
{
GSVector4i dRect = GSVector4i(0, 0, g_texture_cache->GetTemporaryZ()->GetWidth(), g_texture_cache->GetTemporaryZ()->GetHeight());
GSTexture* tex = g_gs_device->CreateDepthStencil(new_w * ds->m_scale, new_h * ds->m_scale, GSTexture::Format::DepthStencil, true);
g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, 0.0f, 1.0f, 1.0f), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_texture_cache->InvalidateTemporaryZ();
g_texture_cache->SetTemporaryZ(tex);
}
}
if (!m_texture_shuffle && !m_channel_shuffle)
{
ds->ResizeValidity(ds->GetUnscaledRect());
@ -4111,7 +4161,7 @@ void GSRendererHW::Draw()
}
// Noting to do if no texture is sampled
if (PRIM->FST && draw_sprite_tex)
if (PRIM->FST && draw_sprite_tex && m_process_texture)
{
if ((GSConfig.UserHacks_RoundSprite > 1) || (GSConfig.UserHacks_RoundSprite == 1 && !m_vt.IsLinear()))
{
@ -4184,7 +4234,7 @@ void GSRendererHW::Draw()
{
const int get_next_ctx = m_env.PRIM.CTXT;
const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx];
if ((m_state_flush_reason != CONTEXTCHANGE) || next_ctx.ZBUF.ZBP == m_context->ZBUF.ZBP && next_ctx.FRAME.FBP == m_context->FRAME.FBP)
if ((m_state_flush_reason != CONTEXTCHANGE) || (next_ctx.ZBUF.ZBP == m_context->ZBUF.ZBP && next_ctx.FRAME.FBP == m_context->FRAME.FBP))
{
m_temp_z_full_copy = true;
}
@ -6667,7 +6717,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.rt = rt ? rt->m_texture : nullptr;
m_conf.ds = ds ? (m_using_temp_z ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr;
pxAssert(!ds || !rt || (ds->m_texture->GetSize().x == rt->m_texture->GetSize().x && ds->m_texture->GetSize().y == rt->m_texture->GetSize().y));
pxAssert(!ds || !rt || (m_conf.ds->GetSize().x == m_conf.rt->GetSize().x && m_conf.ds->GetSize().y == m_conf.rt->GetSize().y));
// Z setup has to come before channel shuffle
EmulateZbuffer(ds);
@ -8511,9 +8561,6 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual()
int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps)
{
if (GSConfig.UserHacks_NativeScaling == GSNativeScaling::Off)
return 0;
const GSVector2i draw_size = GSVector2i(m_vt.m_max.p.x - m_vt.m_min.p.x, m_vt.m_max.p.y - m_vt.m_min.p.y);
const GSVector2i tex_size = GSVector2i(m_vt.m_max.t.x - m_vt.m_min.t.x, m_vt.m_max.t.y - m_vt.m_min.t.y);

View File

@ -1645,7 +1645,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM);
continue;
}
else if (!possible_shuffle && GSLocalMemory::m_psm[psm].trbpp == 8 && TEX0.TBW == 1)
else if (!possible_shuffle && GSLocalMemory::m_psm[psm].bpp <= 8 && TEX0.TBW == 1)
{
DevCon.Warning("Too small for relocation, skipping");
continue;
@ -1739,7 +1739,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32))
continue;
if (!t->Inside(bp, bw, psm, block_boundary_rect))
// Be careful of shuffles where it can shuffle the width of the target, even though it may not have all been drawn to.
if (!possible_shuffle && !t->Inside(bp, bw, psm, block_boundary_rect))
continue;
x_offset = rect.x;
@ -1924,7 +1925,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
}
else
{
if (!possible_shuffle && TEX0.PSM == PSMT8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp != 32)
if (!possible_shuffle && TEX0.PSM == PSMT8 && (GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp != 32 || !(t->m_valid_alpha_high && t->m_valid_alpha_low && t->m_valid_rgb)))
{
continue;
}
@ -2094,7 +2095,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{
bool can_use = true;
if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))
if (dst && ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw) && dst->m_TEX0.TBP0 <= bp))
{
DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0);
i++;
@ -2121,7 +2122,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{
// When returning to being matched with the Z buffer in width, we need to make sure the RGB is up to date as it could get used later (Hitman Contracts).
auto& rev_list = m_dst[1 - type];
Target* dst_match = nullptr;
for (auto j = rev_list.begin(); j != rev_list.end(); ++j)
{
Target* ds = *j;
@ -2155,15 +2155,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
{
if (used)
list.MoveFront(i.Index());
dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16);
/*if (FindOverlappingTarget(dst))
continue;
else*/
break;
break;
}
else if(!(src && src->m_from_target == t))
{
@ -2178,18 +2173,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
// Probably pointing to half way through the target
else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets)
{
// Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z
/*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset)
{
continue;
}*/
const u32 widthpage_offset = (std::abs(static_cast<int>(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U);
/*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW &&
((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) ||
((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) ||
min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) &&
(static_cast<u32>(min_rect.width()) <= (widthpage_offset * 64))));*/
const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast<int>((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0);
const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)));
const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && draw_rect.w <= GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y));
@ -2201,7 +2185,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM];
// I know what you're thinking, and I hate the guy who wrote it too (me). Project Snowblind, Tomb Raider etc decide to offset where they're drawing using a channel shuffle, and this gets messy, so best just to kill the old target.
if (is_shuffle && src->m_TEX0.PSM == PSMT8 && GSRendererHW::GetInstance()->m_context->FRAME.FBW == 1 && t->m_last_draw != (GSState::s_n - 1) && src && src->m_from_target && (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0 || (((src->m_TEX0.TBP0 - src->m_from_target->m_TEX0.TBP0) >> 5) % std::max(src->m_from_target->m_TEX0.TBW, 1U) == 0)) && widthpage_offset && src->m_from_target != t)
if (is_shuffle && src && src->m_TEX0.PSM == PSMT8 && GSRendererHW::GetInstance()->m_context->FRAME.FBW == 1 && t->m_last_draw != (GSState::s_n - 1) && src->m_from_target && (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0 || (((src->m_TEX0.TBP0 - src->m_from_target->m_TEX0.TBP0) >> 5) % std::max(src->m_from_target->m_TEX0.TBW, 1U) == 0)) && widthpage_offset && src->m_from_target != t)
{
GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s offset overwrite shuffle", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM));
InvalidateSourcesFromTarget(t);
@ -2246,7 +2230,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
//Continue just in case there's a newer target
if (used)
list.MoveFront(i.Index());
break;
if (t->m_TEX0.TBP0 <= bp || GSLocalMemory::GetStartBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, min_rect) >= bp)
break;
else
continue;
}
}
}
@ -2827,7 +2814,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
}
}
}
if (dst)
{
dst->m_used |= used;
@ -3112,7 +3099,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
auto j = i;
Target* t = *j;
if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) &&
if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && dst->m_TEX0.TBW == t->m_TEX0.TBW && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) &&
static_cast<int>(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast<int>(dst->m_TEX0.TBW - t->m_TEX0.TBW)))
{
const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW);
@ -3223,6 +3210,91 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons
}
}
}
else
{
for (int type = 0; type < 2; type++)
{
auto& list = m_dst[type];
for (auto i = list.begin(); i != list.end();)
{
auto j = i;
Target* t = *j;
if (t != dst && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && GSUtil::HasSharedBits(dst->m_TEX0.PSM, t->m_TEX0.PSM))
{
if (dst->m_TEX0.TBP0 > t->m_TEX0.TBP0 && (((dst->m_TEX0.TBP0 - t->m_TEX0.TBP0) >> 5) % std::max(t->m_TEX0.TBW, 1U)) == 0)
{
int height_adjust = (((dst->m_TEX0.TBP0 - t->m_TEX0.TBP0) >> 5) / std::max(t->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y;
t->m_valid.w = std::min(height_adjust, t->m_valid.w);
t->ResizeValidity(t->m_valid);
}
else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % std::max(t->m_TEX0.TBW, 1U)) == 0)
{
if (GSUtil::GetChannelMask(dst->m_TEX0.PSM) == 0x7 && (t->m_valid_alpha_high || t->m_valid_alpha_low))
{
t->m_valid_rgb = false;
i++;
continue;
}
int height_adjust = ((((dst->m_end_block + 1) - t->m_TEX0.TBP0) >> 5) / std::max(t->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y;
if (height_adjust < t->m_unscaled_size.y)
{
t->m_TEX0.TBP0 = dst->m_end_block + 1;
t->m_valid.w -= height_adjust;
t->ResizeValidity(t->m_valid);
GSTexture* tex = (type == RenderTarget) ?
g_gs_device->CreateRenderTarget(t->m_texture->GetWidth(),
t->m_texture->GetHeight(), GSTexture::Format::Color, true) :
g_gs_device->CreateDepthStencil(t->m_texture->GetWidth(),
t->m_texture->GetHeight(), GSTexture::Format::DepthStencil, true);
if (tex)
{
g_gs_device->CopyRect(t->m_texture, tex, GSVector4i(0, height_adjust * t->m_scale, t->m_texture->GetWidth(), t->m_texture->GetHeight()), 0, 0);
if (src && src->m_target && src->m_from_target == t)
{
src->m_from_target = t;
src->m_texture = t->m_texture;
src->m_target_direct = false;
src->m_shared_texture = false;
}
else
{
g_gs_device->Recycle(t->m_texture);
}
t->m_texture = tex;
}
}
else
{
if (src && src->m_target && src->m_from_target == t)
{
src->m_from_target = t;
src->m_texture = t->m_texture;
src->m_target_direct = false;
src->m_shared_texture = false;
t->m_texture = nullptr;
i = list.erase(j);
delete t;
}
else
{
InvalidateSourcesFromTarget(t);
i = list.erase(j);
delete t;
}
}
}
}
i++;
}
}
}
return hw_clear.value_or(false);
}
@ -3236,6 +3308,7 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con
// Didn't find a target, check if the frame was uploaded.
bool can_create = is_feedback;
GSVector2i new_size = size;
if (!is_feedback && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0)
{
@ -3274,8 +3347,24 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con
{
iter = std::vector<GSState::GSUploadQueue>::reverse_iterator(GSRendererHW::GetInstance()->m_draw_transfers.erase(iter.base() - 1));
}
else
++iter;
// Double buffers, usually FMV's, if checking for the upper buffer, creating another target could mess things up.
else if (GSLocalMemory::GetStartBlockAddress(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect) <= TEX0.TBP0 && transfer_end >= rect_end && iter->rect.width() == size.x)
{
GSTextureCache::Target* tgt = g_texture_cache->GetExactTarget(iter->blit.DBP, iter->blit.DBW, GSTextureCache::RenderTarget, iter->blit.DBP + 1);
if (tgt) // Make this target bigger.
{
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(iter->blit.DPSM);
tgt->UpdateValidity(iter->rect, true);
new_size.y = iter->rect.w;
tgt->ResizeTexture(new_size.x, new_size.y);
AddDirtyRectTarget(tgt, iter->rect, iter->blit.DPSM, iter->blit.DBW, mask, false);
tgt->Update();
return tgt;
}
}
// In theory it might not be a full rect, but it should be enough to display *something*.
// It's also possible we haven't saved enough of the transfers to fill the rect if the game draws the picture in lots of small transfers.
@ -3287,7 +3376,7 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con
}
}
return can_create ? CreateTarget(TEX0, size, size, scale, RenderTarget, true, 0, true) : nullptr;
return can_create ? CreateTarget(TEX0, new_size, new_size, scale, RenderTarget, true, 0, true) : nullptr;
}
void GSTextureCache::Target::ScaleRTAlpha()
@ -3638,8 +3727,27 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr
for (auto i = list.begin(); i != list.end();)
{
Target* const t = *i;
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp))
{
++i;
continue;
}
// If not fully contained, just dirty the area.
if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp))
{
if (write_bw == t->m_TEX0.TBW && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[write_psm].bpp)
{
const u32 page_offset = ((end_bp - start_bp) >> 5);
const u32 end_width = write_bw * 64;
const u32 end_height = ((page_offset / std::max(write_bw, 1U)) * GSLocalMemory::m_psm[write_psm].pgs.y) + GSLocalMemory::m_psm[write_psm].pgs.y;
const GSVector4i r = GSVector4i(0, 0, end_width, end_height);
const GSVector4i invalidate_r = TranslateAlignedRectByPage(t, start_bp, write_psm, write_bw, r, false).rintersect(t->m_valid); // it is invalidation but we need a real rect.
RGBAMask mask;
mask._u32 = GSUtil::GetChannelMask(write_psm);
AddDirtyRectTarget(t, invalidate_r, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false);
}
++i;
continue;
}
@ -7016,11 +7124,11 @@ void GSTextureCache::Target::Update(bool cannot_scale)
{
if (g_texture_cache->GetTemporaryZInfo().ZBP == m_TEX0.TBP0)
{
GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
const GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo();
if (m_TEX0.TBP0 == z_address_info.ZBP)
{
//GL_CACHE("RT in RT Updating Z copy on draw %d z_offset %d", s_n, z_address_info.offset);
GSVector4i dRect = GSVector4i(total_rect.x * m_scale, (z_address_info.offset + total_rect.y) * m_scale, (total_rect.z + (1.0f / m_scale)) * m_scale, (z_address_info.offset + total_rect.w + (1.0f / m_scale)) * m_scale);
const GSVector4i dRect = GSVector4i(total_rect.x * m_scale, (z_address_info.offset + total_rect.y) * m_scale, (total_rect.z + (1.0f / m_scale)) * m_scale, (z_address_info.offset + total_rect.w + (1.0f / m_scale)) * m_scale);
g_gs_device->StretchRect(m_texture, GSVector4(total_rect.x / static_cast<float>(m_unscaled_size.x), total_rect.y / static_cast<float>(m_unscaled_size.y), (total_rect.z + (1.0f / m_scale)) / static_cast<float>(m_unscaled_size.x), (total_rect.w + (1.0f / m_scale)) / static_cast<float>(m_unscaled_size.y)), g_texture_cache->GetTemporaryZ(), GSVector4(dRect), ShaderConvert::DEPTH_COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
}
@ -7120,11 +7228,6 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect)
m_valid = m_valid.rintersect(rect);
m_drawn_since_read = m_drawn_since_read.rintersect(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
}
// Else No valid size, so need to resize down.
@ -7139,32 +7242,25 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res
m_valid = rect;
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
}
else if (can_resize)
{
m_valid = m_valid.runion(rect);
m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid);
const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5);
if (offset)
m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset);
}
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
}
bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old, bool require_new_rect, GSVector4i new_rect, bool keep_old)
{
if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height && !require_new_rect)
return true;
const GSVector2i size = m_texture->GetSize();
const GSVector2i new_unscaled_size = GSVector2i(new_unscaled_width, new_unscaled_height);
const GSVector2i new_size = ScaleRenderTargetSize(new_unscaled_size, m_scale);
if (size.x == new_size.x && size.y == new_size.y && !require_new_rect)
return true;
const bool clear = (new_size.x > size.x || new_size.y > size.y);
GSTexture* tex = m_texture->IsDepthStencil() ?
@ -7219,7 +7315,7 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca
if (!keep_old)
{
g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage();
g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage();
if (recycle_old)
g_gs_device->Recycle(m_texture);

View File

@ -563,7 +563,7 @@ public:
GSTexture* GetTemporaryZ();
TempZAddress GetTemporaryZInfo();
void SetTemporaryZInfo(u32 address, u32 offset);
/// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is
/// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is.
void InvalidateTemporaryZ();
/// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred.

View File

@ -1175,7 +1175,7 @@ struct PSMain
{
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.br = C.rb;
C.br = C.rb;
C.ag = C.ga;
}
else if(PS_PROCESS_BA & SHUFFLE_READ)
@ -1190,7 +1190,7 @@ struct PSMain
}
}
}
ps_dither(C, alpha_blend.a);
// Color clamp/wrap needs to be done after sw blending and dithering

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 61;
static constexpr u32 SHADER_CACHE_VERSION = 62;