From a6d5598c08bb8ceabbea18719c8b239b079de982 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 4 Mar 2025 19:10:41 +0000 Subject: [PATCH] GS/HW: More RT in RT regression fixes --- bin/resources/shaders/dx11/tfx.fx | 2 +- bin/resources/shaders/opengl/tfx_fs.glsl | 2 +- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GS.cpp | 9 ++ pcsx2/GS/GSState.cpp | 45 +++++- pcsx2/GS/GSState.h | 2 + pcsx2/GS/Renderers/Common/GSRenderer.cpp | 9 -- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 181 +++++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 192 +++++++++++++++++------ pcsx2/GS/Renderers/HW/GSTextureCache.h | 2 +- pcsx2/GS/Renderers/Metal/tfx.metal | 4 +- pcsx2/ShaderCacheVersion.h | 2 +- 12 files changed, 316 insertions(+), 138 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 33ca3634be..a0de0b4246 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1127,7 +1127,7 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.br = C.rb; + C.br = C.rb; C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index b19dee992b..8dc7f852ff 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1095,7 +1095,7 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.br = C.rb; + C.br = C.rb; C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 757313ff90..3061226288 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1343,8 +1343,6 @@ void main() #endif #endif - - // Special case for 32bit input and 16bit output, shuffle used by The Godfather #if PS_SHUFFLE_SAME #if (PS_PROCESS_BA & SHUFFLE_READ) @@ -1362,7 +1360,7 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.br = C.rb; + C.br = C.rb; C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index e17bad25ec..43f88e826d 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -435,6 +435,15 @@ void GSgifTransfer3(u8* mem, u32 size) void GSvsync(u32 field, bool registers_written) { + // Update this here because we need to check if the pending draw affects the current frame, so our regs need to be updated. + g_gs_renderer->PCRTCDisplays.SetVideoMode(g_gs_renderer->GetVideoMode()); + g_gs_renderer->PCRTCDisplays.EnableDisplays(g_gs_renderer->m_regs->PMODE, g_gs_renderer->m_regs->SMODE2, g_gs_renderer->isReallyInterlaced()); + g_gs_renderer->PCRTCDisplays.CheckSameSource(); + g_gs_renderer->PCRTCDisplays.SetRects(0, g_gs_renderer->m_regs->DISP[0].DISPLAY, g_gs_renderer->m_regs->DISP[0].DISPFB); + g_gs_renderer->PCRTCDisplays.SetRects(1, g_gs_renderer->m_regs->DISP[1].DISPLAY, g_gs_renderer->m_regs->DISP[1].DISPFB); + g_gs_renderer->PCRTCDisplays.CalculateDisplayOffset(g_gs_renderer->m_scanmask_used); + g_gs_renderer->PCRTCDisplays.CalculateFramebufferOffset(g_gs_renderer->m_scanmask_used); + // Do not move the flush into the VSync() method. It's here because EE transfers // get cleared in HW VSync, and may be needed for a buffered draw (FFX FMVs). g_gs_renderer->Flush(GSState::VSYNC); diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index eac0daf7eb..cd208b2c71 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1477,6 +1477,35 @@ void GSState::Flush(GSFlushReason reason) if (m_index.tail > 0) { + // Unless Vsync really needs the pending draw, don't do it when VSync happens as it can really screw up our heuristics when looking ahead. + if (reason == VSYNC) + { + GSDrawingContext* draw_ctx = &m_prev_env.CTXT[m_prev_env.PRIM.CTXT]; + const u32 start_bp = GSLocalMemory::GetStartBlockAddress(draw_ctx->FRAME.Block(), draw_ctx->FRAME.FBW, draw_ctx->FRAME.PSM, temp_draw_rect); + const u32 end_bp = GSLocalMemory::GetEndBlockAddress(draw_ctx->FRAME.Block(), draw_ctx->FRAME.FBW, draw_ctx->FRAME.PSM, temp_draw_rect); + bool needs_flush[2] = {PCRTCDisplays.PCRTCDisplays[0].enabled, PCRTCDisplays.PCRTCDisplays[1].enabled}; + + if (PCRTCDisplays.PCRTCDisplays[1].enabled) + { + const u32 out_start_bp = GSLocalMemory::GetStartBlockAddress(PCRTCDisplays.PCRTCDisplays[1].Block(), PCRTCDisplays.PCRTCDisplays[1].FBW, PCRTCDisplays.PCRTCDisplays[1].PSM, PCRTCDisplays.PCRTCDisplays[1].framebufferRect); + const u32 out_end_bp = GSLocalMemory::GetEndBlockAddress(PCRTCDisplays.PCRTCDisplays[1].Block(), PCRTCDisplays.PCRTCDisplays[1].FBW, PCRTCDisplays.PCRTCDisplays[1].PSM, PCRTCDisplays.PCRTCDisplays[1].framebufferRect); + + if (out_start_bp > end_bp || out_end_bp < start_bp) + needs_flush[1] = false; + } + + if (PCRTCDisplays.PCRTCDisplays[0].enabled) + { + const u32 out_start_bp = GSLocalMemory::GetStartBlockAddress(PCRTCDisplays.PCRTCDisplays[0].Block(), PCRTCDisplays.PCRTCDisplays[0].FBW, PCRTCDisplays.PCRTCDisplays[0].PSM, PCRTCDisplays.PCRTCDisplays[0].framebufferRect); + const u32 out_end_bp = GSLocalMemory::GetEndBlockAddress(PCRTCDisplays.PCRTCDisplays[0].Block(), PCRTCDisplays.PCRTCDisplays[0].FBW, PCRTCDisplays.PCRTCDisplays[0].PSM, PCRTCDisplays.PCRTCDisplays[0].framebufferRect); + + if (out_start_bp > end_bp || out_end_bp < start_bp) + needs_flush[0] = false; + } + + if (!needs_flush[0] && !needs_flush[1]) + return; + } m_state_flush_reason = reason; // Used to prompt the current draw that it's modifying its own CLUT. @@ -1942,10 +1971,10 @@ void GSState::Write(const u8* mem, int len) m_draw_transfers.push_back(new_transfer); } - GL_CACHE("Write! %u ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)", s_transfer_n, + GL_CACHE("Write! %u ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d) draw %d", s_transfer_n, blit.DBP, blit.DBW, psm_str(blit.DPSM), m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, - m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h); + m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h, s_n); if (len >= m_tr.total) { @@ -3093,7 +3122,7 @@ void GSState::CalculatePrimitiveCoversWithoutGaps() } else if (m_vt.m_primclass == GS_TRIANGLE_CLASS) { - m_primitive_covers_without_gaps = ((m_index.tail % 6) == 0 && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound; + m_primitive_covers_without_gaps = ((m_index.tail == 6 || ((m_index.tail % 6) == 0 && m_primitive_covers_without_gaps == FullCover)) && TrianglesAreQuads()) ? m_primitive_covers_without_gaps : GapsFound; return; } else if (m_vt.m_primclass != GS_SPRITE_CLASS) @@ -3123,7 +3152,7 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) { // Pretty confident here... GSVertex* buffer = &m_vertex.buff[0]; - const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) <= 256; // Lequal to 16 pixels apart. if (const_spacing) return false; @@ -4728,10 +4757,16 @@ GSVector2i GSState::GSPCRTCRegs::GetFramebufferSize(int display) void GSState::GSPCRTCRegs::SetRects(int display, GSRegDISPLAY displayReg, GSRegDISPFB framebufferReg) { // Save framebuffer information first, while we're here. + PCRTCDisplays[display].prevFramebufferReg.FBP = PCRTCDisplays[display].FBP; + PCRTCDisplays[display].prevFramebufferReg.FBW = PCRTCDisplays[display].FBW; + PCRTCDisplays[display].prevFramebufferReg.PSM = PCRTCDisplays[display].PSM; + PCRTCDisplays[display].prevFramebufferReg.DBX = PCRTCDisplays[display].DBX; + PCRTCDisplays[display].prevFramebufferReg.DBY = PCRTCDisplays[display].DBY; PCRTCDisplays[display].FBP = framebufferReg.FBP; PCRTCDisplays[display].FBW = framebufferReg.FBW; PCRTCDisplays[display].PSM = framebufferReg.PSM; - PCRTCDisplays[display].prevFramebufferReg = framebufferReg; + PCRTCDisplays[display].DBX = framebufferReg.DBX; + PCRTCDisplays[display].DBY = framebufferReg.DBY; // Probably not really enabled but will cause a mess. // Q-Ball Billiards enables both circuits but doesn't set one of them up. if (PCRTCDisplays[display].FBW == 0 && displayReg.DW == 0 && displayReg.DH == 0 && displayReg.MAGH == 0) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 638b87090f..4d3c696b08 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -323,6 +323,8 @@ public: int FBP; int FBW; int PSM; + int DBY; + int DBX; GSRegDISPFB prevFramebufferReg; GSVector2i prevDisplayOffset; GSVector2i displayOffset; diff --git a/pcsx2/GS/Renderers/Common/GSRenderer.cpp b/pcsx2/GS/Renderers/Common/GSRenderer.cpp index fc068c13a4..4fff716634 100644 --- a/pcsx2/GS/Renderers/Common/GSRenderer.cpp +++ b/pcsx2/GS/Renderers/Common/GSRenderer.cpp @@ -87,10 +87,6 @@ bool GSRenderer::Merge(int field) int y_offset[3] = { 0, 0, 0 }; const bool feedback_merge = m_regs->EXTWRITE.WRITE == 1; - PCRTCDisplays.SetVideoMode(GetVideoMode()); - PCRTCDisplays.EnableDisplays(m_regs->PMODE, m_regs->SMODE2, isReallyInterlaced()); - PCRTCDisplays.CheckSameSource(); - if (!PCRTCDisplays.PCRTCDisplays[0].enabled && !PCRTCDisplays.PCRTCDisplays[1].enabled) { m_real_size = GSVector2i(0, 0); @@ -101,11 +97,6 @@ bool GSRenderer::Merge(int field) const bool game_deinterlacing = (m_regs->DISP[0].DISPFB.DBY != PCRTCDisplays.PCRTCDisplays[0].prevFramebufferReg.DBY) != (m_regs->DISP[1].DISPFB.DBY != PCRTCDisplays.PCRTCDisplays[1].prevFramebufferReg.DBY); - PCRTCDisplays.SetRects(0, m_regs->DISP[0].DISPLAY, m_regs->DISP[0].DISPFB); - PCRTCDisplays.SetRects(1, m_regs->DISP[1].DISPLAY, m_regs->DISP[1].DISPFB); - PCRTCDisplays.CalculateDisplayOffset(m_scanmask_used); - PCRTCDisplays.CalculateFramebufferOffset(m_scanmask_used); - // Only need to check the right/bottom on software renderer, hardware always gets the full texture then cuts a bit out later. if (PCRTCDisplays.FrameRectMatch() && !PCRTCDisplays.FrameWrap() && !feedback_merge) { diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index e50a664e46..48aea90f5e 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -587,6 +587,12 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, v[i + reversed_pos].XYZ.X -= 128u; v[i + 1 - reversed_pos].XYZ.X -= 128u; } + // Needed for when there's no barriers. + if (v[i + reversed_U].U & 128) + { + v[i + reversed_U].U -= 128u; + v[i + 1 - reversed_U].U -= 128u; + } } if (half_bottom_vert) @@ -649,6 +655,14 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, else v[i + 1 - reversed_S].ST.S += offset_8pix; } + else + { + if (static_cast(v[i + reversed_S].ST.S * tw) & 8) + { + v[i + reversed_S].ST.S -= offset_8pix; + v[i + 1 - reversed_S].ST.S -= offset_8pix; + } + } if (half_bottom_vert) { @@ -2481,7 +2495,7 @@ void GSRendererHW::Draw() } // We trigger the sw prim render here super early, to avoid creating superfluous render targets. - if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex) && SwPrimRender(*this, true, true)) + if (CanUseSwPrimRender(no_rt, no_ds, draw_sprite_tex && m_process_texture) && SwPrimRender(*this, true, true)) { GL_CACHE("Possible texture decompression, drawn with SwPrimRender() (BP %x BW %u TBP0 %x TBW %u)", m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBMSK, m_cached_ctx.TEX0.TBP0, m_cached_ctx.TEX0.TBW); @@ -2643,27 +2657,34 @@ void GSRendererHW::Draw() // Try to fix large single-page-wide draws. bool height_invalid = m_r.w >= 1024; + const GSVector2i& pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; if (height_invalid && m_cached_ctx.FRAME.FBW <= 1 && TryToResolveSinglePageFramebuffer(m_cached_ctx.FRAME, true)) { - const GSVector2i& pgs = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].pgs; ReplaceVerticesWithSprite( GetDrawRectForPages(m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, (m_r.w + (pgs.y - 1)) / pgs.y), GSVector2i(1, 1)); height_invalid = false; } - const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color); - const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth); + // Be careful of being 1 pixel from filled. + const bool page_aligned = (m_r.w % pgs.y) == (pgs.y - 1) || (m_r.w % pgs.y) == 0; + const bool is_zero_color_clear = (GetConstantDirectWriteMemClearColor() == 0 && !preserve_rt_color && page_aligned); + const bool is_zero_depth_clear = (GetConstantDirectWriteMemClearDepth() == 0 && !preserve_depth && page_aligned); // If it's an invalid-sized draw, do the mem clear on the CPU, we don't want to create huge targets. // If clearing to zero, don't bother creating the target. Games tend to clear more than they use, wasting VRAM/bandwidth. if (is_zero_color_clear || is_zero_depth_clear || height_invalid) { - const u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress( + u32 rt_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); const u32 ds_end_bp = GSLocalMemory::GetUnwrappedEndBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r); + + // This can get missed by the double half clear, but we can make sure we nuke everything inside if the Z is butted up against the FRAME. + if (!no_ds && (rt_end_bp + 1) == m_cached_ctx.ZBUF.Block() && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp) + rt_end_bp = ds_end_bp; + // If this is a partial clear of a larger buffer, we can't invalidate the target, since we'll be losing data // which only existed on the GPU. Assume a BW change is a new target, though. Test case: Persona 3 shadows. GSTextureCache::Target* tgt; @@ -2893,7 +2914,10 @@ void GSRendererHW::Draw() // TODO: Be able to send an alpha of 1.0 (blended with vertex alpha maybe?) so we can avoid sending the texture, since we don't always need it. // Example games: Evolution Snowboarding, Final Fantasy Dirge of Cerberus, Red Dead Revolver, Stuntman, Tony Hawk's Underground 2, Ultimate Spider-Man. if (!req_color && !alpha_used) + { m_process_texture = false; + possible_shuffle = false; + } else { src = tex_psm.depth ? g_texture_cache->LookupDepthSource(true, TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, possible_shuffle, m_vt.IsLinear(), m_cached_ctx.FRAME.Block(), req_color, req_alpha) : @@ -2971,31 +2995,36 @@ void GSRendererHW::Draw() const GSVector4i unclamped_draw_rect = m_r; float target_scale = GetTextureScaleFactor(); + bool scaled_copy = false; int scale_draw = IsScalingDraw(src, m_primitive_covers_without_gaps != NoGapsType::GapsFound); - if (target_scale > 1.0f && scale_draw > 0) + if (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off) { - // 1 == Downscale, so we need to reduce the size of the target also. - // 2 == Upscale, so likely putting it over the top of the render target. - if (scale_draw == 1) + if (target_scale > 1.0f && scale_draw > 0) { - target_scale = 1.0f; - m_downscale_source = src->m_from_target->GetScale() > 1.0f; + // 1 == Downscale, so we need to reduce the size of the target also. + // 2 == Upscale, so likely putting it over the top of the render target. + if (scale_draw == 1) + { + target_scale = 1.0f; + m_downscale_source = src->m_from_target->GetScale() > 1.0f; + } + else + m_downscale_source = GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa. } else - m_downscale_source = GSConfig.UserHacks_NativeScaling != GSNativeScaling::Aggressive ? false : src->m_from_target->GetScale() > 1.0f; // Bad for GTA + Full Spectrum Warrior, good for Sacred Blaze + Parappa. - } - else - { - // if it's directly copying keep the scale - Ratchet and clank hits this, stops edge garbage happening. - // Keep it to small targets of 256 or lower. - if (scale_draw == -1 && src && src->m_from_target && src->m_from_target->m_downscaled && static_cast(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) && - (GSVector4i(m_vt.m_min.p).xyxy() == GSVector4i(m_vt.m_min.t).xyxy()).alltrue() && (GSVector4i(m_vt.m_max.p).xyxy() == GSVector4i(m_vt.m_max.t).xyxy()).alltrue()) { - target_scale = src->m_from_target->GetScale(); - scale_draw = 1; - } + // if it's directly copying keep the scale - Ratchet and clank hits this, stops edge garbage happening. + // Keep it to small targets of 256 or lower. + if (scale_draw == -1 && src && src->m_from_target && src->m_from_target->m_downscaled && static_cast(m_cached_ctx.FRAME.FBW * 64) <= (PCRTCDisplays.GetResolution().x >> 1) && + (GSVector4i(m_vt.m_min.p).xyxy() == GSVector4i(m_vt.m_min.t).xyxy()).alltrue() && (GSVector4i(m_vt.m_max.p).xyxy() == GSVector4i(m_vt.m_max.t).xyxy()).alltrue()) + { + target_scale = src->m_from_target->GetScale(); + scale_draw = 1; + scaled_copy = true; + } - m_downscale_source = false; + m_downscale_source = false; + } } if (IsPossibleChannelShuffle() && src && src->m_from_target && src->m_from_target->GetScale() != target_scale) @@ -3101,7 +3130,7 @@ void GSRendererHW::Draw() if (!no_rt) { - possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + possible_shuffle |= draw_sprite_tex && m_process_texture && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && (GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 || draw_uses_target) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); @@ -3157,7 +3186,7 @@ void GSRendererHW::Draw() // Preserve downscaled target when copying directly from a downscaled target, or it's a normal draw using a downscaled target. Clears that are drawing to the target can also preserve size. // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. - const bool preserve_downscale_draw = std::abs(scale_draw) == 1 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); + const bool preserve_downscale_draw = (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && (std::abs(scale_draw) == 1 || (scale_draw == 0 && src && src->m_from_target && src->m_from_target->m_downscaled))) || is_possible_mem_clear == ClearType::ClearWithDraw; rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), @@ -3198,7 +3227,7 @@ void GSRendererHW::Draw() return; } - rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, + rt = g_texture_cache->CreateTarget(FRAME_TEX0, t_size, GetValidSize(src), (GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && scale_draw < 0 && is_possible_mem_clear != ClearType::NormalClear) ? src->m_from_target->GetScale() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_color || possible_shuffle, lookup_rect, src); if (!rt) [[unlikely]] @@ -3245,7 +3274,7 @@ void GSRendererHW::Draw() if (rt->m_dirty.size()) { - for (int i = 0; i < rt->m_dirty.size(); i++) + for (int i = 0; i < static_cast(rt->m_dirty.size()); i++) { rt->m_dirty[i].r.y += new_offset; rt->m_dirty[i].r.w += new_offset; @@ -3266,34 +3295,6 @@ void GSRendererHW::Draw() if (vertical_offset || horizontal_offset) { - // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? - if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) - { - const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; - if (g_texture_cache->GetTemporaryZ() != nullptr) - { - GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo(); - - if (ds->m_TEX0.TBP0 != z_address_info.ZBP || z_address_info.offset != (vertical_offset - z_vertical_offset)) - g_texture_cache->InvalidateTemporaryZ(); - } - - if (g_texture_cache->GetTemporaryZ() == nullptr) - { - m_temp_z_full_copy = false; - u32 vertical_size = std::max(rt->m_unscaled_size.y, ds->m_unscaled_size.y); - GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); - GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, (vertical_offset + ds->m_unscaled_size.y - z_vertical_offset) * ds->m_scale); - const int new_height = std::max(static_cast(vertical_size * ds->m_scale), dRect.w); - GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); - g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, (ds->m_unscaled_size.y - z_vertical_offset) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); - g_perfmon.Put(GSPerfMon::TextureCopies, 1); - g_texture_cache->SetTemporaryZ(tex); - g_texture_cache->SetTemporaryZInfo(ds->m_TEX0.TBP0, vertical_offset - z_vertical_offset); - } - m_using_temp_z = true; - } - GSVertex* v = &m_vertex.buff[0]; for (u32 i = 0; i < m_vertex.tail; i++) @@ -3336,6 +3337,36 @@ void GSRendererHW::Draw() t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; + + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + const int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + if (g_texture_cache->GetTemporaryZ() != nullptr) + { + GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo(); + + if (ds->m_TEX0.TBP0 != z_address_info.ZBP || z_address_info.offset != static_cast(vertical_offset - z_vertical_offset)) + g_texture_cache->InvalidateTemporaryZ(); + } + + if (g_texture_cache->GetTemporaryZ() == nullptr) + { + m_temp_z_full_copy = false; + u32 vertical_size = std::max(rt->m_unscaled_size.y, ds->m_unscaled_size.y); + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, vertical_offset); + GSVector4i dRect = GSVector4i(0, vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, (vertical_offset + ds->m_unscaled_size.y - z_vertical_offset) * ds->m_scale); + const int new_height = std::max(static_cast(vertical_size * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, (ds->m_unscaled_size.y - z_vertical_offset) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + g_texture_cache->SetTemporaryZ(tex); + g_texture_cache->SetTemporaryZInfo(ds->m_TEX0.TBP0, vertical_offset - z_vertical_offset); + t_size.y = std::max(new_height, t_size.y); + } + m_using_temp_z = true; + + } } // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) @@ -3373,7 +3404,7 @@ void GSRendererHW::Draw() } } } - + if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) { src->m_texture = rt->m_texture; @@ -3392,6 +3423,7 @@ void GSRendererHW::Draw() // Slightly abusing the texture resize. ds->m_scale = target_scale; ds->m_unscaled_size = unscaled_size; + ds->m_downscaled = rt->m_downscaled; } // The target might have previously been a C32 format with valid alpha. If we're switching to C24, we need to preserve it. preserve_rt_alpha |= (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == 24 && rt->HasValidAlpha()); @@ -3713,17 +3745,16 @@ void GSRendererHW::Draw() if (m_cached_ctx.FRAME.FBMSK & 0xF0000000) rt->m_valid_alpha_high = false; } - if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y) || (scale_draw == 1 && !scaled_copy)) { FRAME_TEX0.TBP0 = rt->m_TEX0.TBP0; rt->m_TEX0 = FRAME_TEX0; - } } if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) { - if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + if (ZBUF_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y) || (scale_draw == 1 && !scaled_copy)) { ZBUF_TEX0.TBP0 = ds->m_TEX0.TBP0; ds->m_TEX0 = ZBUF_TEX0; @@ -3821,8 +3852,10 @@ void GSRendererHW::Draw() // We still need to make sure the dimensions of the targets match. // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. - const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); - const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); + GSVector2i ds_size = m_using_temp_z ? GSVector2i(g_texture_cache->GetTemporaryZ()->GetSize() / ds->m_scale) : (ds ? ds->m_unscaled_size : GSVector2i(0,0)); + + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3915,6 +3948,23 @@ void GSRendererHW::Draw() ds->ResizeTexture(new_w, new_h); + + if (m_using_temp_z) + { + const int z_width = g_texture_cache->GetTemporaryZ()->GetWidth() / ds->m_scale; + const int z_height = g_texture_cache->GetTemporaryZ()->GetHeight() / ds->m_scale; + + if (z_width != new_w || z_height != new_h) + { + GSVector4i dRect = GSVector4i(0, 0, g_texture_cache->GetTemporaryZ()->GetWidth(), g_texture_cache->GetTemporaryZ()->GetHeight()); + + GSTexture* tex = g_gs_device->CreateDepthStencil(new_w * ds->m_scale, new_h * ds->m_scale, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, 0.0f, 1.0f, 1.0f), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + g_texture_cache->InvalidateTemporaryZ(); + g_texture_cache->SetTemporaryZ(tex); + } + } if (!m_texture_shuffle && !m_channel_shuffle) { ds->ResizeValidity(ds->GetUnscaledRect()); @@ -4111,7 +4161,7 @@ void GSRendererHW::Draw() } // Noting to do if no texture is sampled - if (PRIM->FST && draw_sprite_tex) + if (PRIM->FST && draw_sprite_tex && m_process_texture) { if ((GSConfig.UserHacks_RoundSprite > 1) || (GSConfig.UserHacks_RoundSprite == 1 && !m_vt.IsLinear())) { @@ -4184,7 +4234,7 @@ void GSRendererHW::Draw() { const int get_next_ctx = m_env.PRIM.CTXT; const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; - if ((m_state_flush_reason != CONTEXTCHANGE) || next_ctx.ZBUF.ZBP == m_context->ZBUF.ZBP && next_ctx.FRAME.FBP == m_context->FRAME.FBP) + if ((m_state_flush_reason != CONTEXTCHANGE) || (next_ctx.ZBUF.ZBP == m_context->ZBUF.ZBP && next_ctx.FRAME.FBP == m_context->FRAME.FBP)) { m_temp_z_full_copy = true; } @@ -6667,7 +6717,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.rt = rt ? rt->m_texture : nullptr; m_conf.ds = ds ? (m_using_temp_z ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; - pxAssert(!ds || !rt || (ds->m_texture->GetSize().x == rt->m_texture->GetSize().x && ds->m_texture->GetSize().y == rt->m_texture->GetSize().y)); + pxAssert(!ds || !rt || (m_conf.ds->GetSize().x == m_conf.rt->GetSize().x && m_conf.ds->GetSize().y == m_conf.rt->GetSize().y)); // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -8511,9 +8561,6 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual() int GSRendererHW::IsScalingDraw(GSTextureCache::Source* src, bool no_gaps) { - if (GSConfig.UserHacks_NativeScaling == GSNativeScaling::Off) - return 0; - const GSVector2i draw_size = GSVector2i(m_vt.m_max.p.x - m_vt.m_min.p.x, m_vt.m_max.p.y - m_vt.m_min.p.y); const GSVector2i tex_size = GSVector2i(m_vt.m_max.t.x - m_vt.m_min.t.x, m_vt.m_max.t.y - m_vt.m_min.t.y); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 93003fb16c..1c38ab1730 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1645,7 +1645,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); continue; } - else if (!possible_shuffle && GSLocalMemory::m_psm[psm].trbpp == 8 && TEX0.TBW == 1) + else if (!possible_shuffle && GSLocalMemory::m_psm[psm].bpp <= 8 && TEX0.TBW == 1) { DevCon.Warning("Too small for relocation, skipping"); continue; @@ -1739,7 +1739,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!t->HasValidBitsForFormat(psm, req_color, req_alpha, t->m_TEX0.TBW == TEX0.TBW) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; - if (!t->Inside(bp, bw, psm, block_boundary_rect)) + // Be careful of shuffles where it can shuffle the width of the target, even though it may not have all been drawn to. + if (!possible_shuffle && !t->Inside(bp, bw, psm, block_boundary_rect)) continue; x_offset = rect.x; @@ -1924,7 +1925,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } else { - if (!possible_shuffle && TEX0.PSM == PSMT8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp != 32) + if (!possible_shuffle && TEX0.PSM == PSMT8 && (GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp != 32 || !(t->m_valid_alpha_high && t->m_valid_alpha_low && t->m_valid_rgb))) { continue; } @@ -2094,7 +2095,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { bool can_use = true; - if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + if (dst && ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw) && dst->m_TEX0.TBP0 <= bp)) { DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); i++; @@ -2121,7 +2122,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { // When returning to being matched with the Z buffer in width, we need to make sure the RGB is up to date as it could get used later (Hitman Contracts). auto& rev_list = m_dst[1 - type]; - Target* dst_match = nullptr; for (auto j = rev_list.begin(); j != rev_list.end(); ++j) { Target* ds = *j; @@ -2155,15 +2155,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { if (used) list.MoveFront(i.Index()); - dst = t; dst->m_32_bits_fmt |= (psm_s.bpp != 16); - - /*if (FindOverlappingTarget(dst)) - continue; - else*/ - break; + break; } else if(!(src && src->m_from_target == t)) { @@ -2178,18 +2173,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // Probably pointing to half way through the target else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z - /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) - { - continue; - }*/ - const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - /*const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && - ((((min_rect.z + 63) >> 6) + widthpage_offset) <= TEX0.TBW) || - ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || - min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && - (static_cast(min_rect.width()) <= (widthpage_offset * 64))));*/ const bool is_aligned_ok = widthpage_offset == 0 || ((min_rect.width() <= static_cast((t->m_TEX0.TBW - widthpage_offset) * 64) && (t->m_TEX0.TBW == TEX0.TBW || TEX0.TBW == 1)) && bp >= t->m_TEX0.TBP0); const bool no_target_or_newer = (!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))); const bool width_match = (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && draw_rect.w <= GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y)); @@ -2201,7 +2185,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe const GSLocalMemory::psm_t& s_psm = GSLocalMemory::m_psm[TEX0.PSM]; // I know what you're thinking, and I hate the guy who wrote it too (me). Project Snowblind, Tomb Raider etc decide to offset where they're drawing using a channel shuffle, and this gets messy, so best just to kill the old target. - if (is_shuffle && src->m_TEX0.PSM == PSMT8 && GSRendererHW::GetInstance()->m_context->FRAME.FBW == 1 && t->m_last_draw != (GSState::s_n - 1) && src && src->m_from_target && (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0 || (((src->m_TEX0.TBP0 - src->m_from_target->m_TEX0.TBP0) >> 5) % std::max(src->m_from_target->m_TEX0.TBW, 1U) == 0)) && widthpage_offset && src->m_from_target != t) + if (is_shuffle && src && src->m_TEX0.PSM == PSMT8 && GSRendererHW::GetInstance()->m_context->FRAME.FBW == 1 && t->m_last_draw != (GSState::s_n - 1) && src->m_from_target && (src->m_from_target->m_TEX0.TBP0 == src->m_TEX0.TBP0 || (((src->m_TEX0.TBP0 - src->m_from_target->m_TEX0.TBP0) >> 5) % std::max(src->m_from_target->m_TEX0.TBW, 1U) == 0)) && widthpage_offset && src->m_from_target != t) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s offset overwrite shuffle", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -2246,7 +2230,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe //Continue just in case there's a newer target if (used) list.MoveFront(i.Index()); - break; + if (t->m_TEX0.TBP0 <= bp || GSLocalMemory::GetStartBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, min_rect) >= bp) + break; + else + continue; } } } @@ -2827,7 +2814,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } } - + if (dst) { dst->m_used |= used; @@ -3112,7 +3099,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM && dst->m_TEX0.TBW == t->m_TEX0.TBW && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && static_cast(((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) / 32) % std::max(dst->m_TEX0.TBW, 1U)) <= std::max(0, static_cast(dst->m_TEX0.TBW - t->m_TEX0.TBW))) { const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); @@ -3223,6 +3210,91 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons } } } + else + { + for (int type = 0; type < 2; type++) + { + auto& list = m_dst[type]; + for (auto i = list.begin(); i != list.end();) + { + auto j = i; + Target* t = *j; + if (t != dst && t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid) && GSUtil::HasSharedBits(dst->m_TEX0.PSM, t->m_TEX0.PSM)) + { + if (dst->m_TEX0.TBP0 > t->m_TEX0.TBP0 && (((dst->m_TEX0.TBP0 - t->m_TEX0.TBP0) >> 5) % std::max(t->m_TEX0.TBW, 1U)) == 0) + { + int height_adjust = (((dst->m_TEX0.TBP0 - t->m_TEX0.TBP0) >> 5) / std::max(t->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + + t->m_valid.w = std::min(height_adjust, t->m_valid.w); + t->ResizeValidity(t->m_valid); + } + else if (dst->m_TEX0.TBP0 < t->m_TEX0.TBP0 && (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % std::max(t->m_TEX0.TBW, 1U)) == 0) + { + if (GSUtil::GetChannelMask(dst->m_TEX0.PSM) == 0x7 && (t->m_valid_alpha_high || t->m_valid_alpha_low)) + { + t->m_valid_rgb = false; + i++; + continue; + } + + int height_adjust = ((((dst->m_end_block + 1) - t->m_TEX0.TBP0) >> 5) / std::max(t->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + + if (height_adjust < t->m_unscaled_size.y) + { + t->m_TEX0.TBP0 = dst->m_end_block + 1; + t->m_valid.w -= height_adjust; + t->ResizeValidity(t->m_valid); + + GSTexture* tex = (type == RenderTarget) ? + g_gs_device->CreateRenderTarget(t->m_texture->GetWidth(), + t->m_texture->GetHeight(), GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(t->m_texture->GetWidth(), + t->m_texture->GetHeight(), GSTexture::Format::DepthStencil, true); + if (tex) + { + g_gs_device->CopyRect(t->m_texture, tex, GSVector4i(0, height_adjust * t->m_scale, t->m_texture->GetWidth(), t->m_texture->GetHeight()), 0, 0); + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = t; + src->m_texture = t->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + g_gs_device->Recycle(t->m_texture); + } + t->m_texture = tex; + } + } + else + { + if (src && src->m_target && src->m_from_target == t) + { + src->m_from_target = t; + src->m_texture = t->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + + t->m_texture = nullptr; + i = list.erase(j); + delete t; + } + else + { + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; + } + } + } + + } + i++; + } + } + } + return hw_clear.value_or(false); } @@ -3236,6 +3308,7 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con // Didn't find a target, check if the frame was uploaded. bool can_create = is_feedback; + GSVector2i new_size = size; if (!is_feedback && GSRendererHW::GetInstance()->m_draw_transfers.size() > 0) { @@ -3274,8 +3347,24 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con { iter = std::vector::reverse_iterator(GSRendererHW::GetInstance()->m_draw_transfers.erase(iter.base() - 1)); } - else - ++iter; + // Double buffers, usually FMV's, if checking for the upper buffer, creating another target could mess things up. + else if (GSLocalMemory::GetStartBlockAddress(iter->blit.DBP, iter->blit.DBW, iter->blit.DPSM, iter->rect) <= TEX0.TBP0 && transfer_end >= rect_end && iter->rect.width() == size.x) + { + GSTextureCache::Target* tgt = g_texture_cache->GetExactTarget(iter->blit.DBP, iter->blit.DBW, GSTextureCache::RenderTarget, iter->blit.DBP + 1); + + if (tgt) // Make this target bigger. + { + RGBAMask mask; + mask._u32 = GSUtil::GetChannelMask(iter->blit.DPSM); + tgt->UpdateValidity(iter->rect, true); + new_size.y = iter->rect.w; + tgt->ResizeTexture(new_size.x, new_size.y); + AddDirtyRectTarget(tgt, iter->rect, iter->blit.DPSM, iter->blit.DBW, mask, false); + tgt->Update(); + + return tgt; + } + } // In theory it might not be a full rect, but it should be enough to display *something*. // It's also possible we haven't saved enough of the transfers to fill the rect if the game draws the picture in lots of small transfers. @@ -3287,7 +3376,7 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con } } - return can_create ? CreateTarget(TEX0, size, size, scale, RenderTarget, true, 0, true) : nullptr; + return can_create ? CreateTarget(TEX0, new_size, new_size, scale, RenderTarget, true, 0, true) : nullptr; } void GSTextureCache::Target::ScaleRTAlpha() @@ -3638,8 +3727,27 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr for (auto i = list.begin(); i != list.end();) { Target* const t = *i; + if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 > end_bp || t->UnwrappedEndBlock() < start_bp)) + { + ++i; + continue; + } + + // If not fully contained, just dirty the area. if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) { + if (write_bw == t->m_TEX0.TBW && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[write_psm].bpp) + { + const u32 page_offset = ((end_bp - start_bp) >> 5); + const u32 end_width = write_bw * 64; + const u32 end_height = ((page_offset / std::max(write_bw, 1U)) * GSLocalMemory::m_psm[write_psm].pgs.y) + GSLocalMemory::m_psm[write_psm].pgs.y; + const GSVector4i r = GSVector4i(0, 0, end_width, end_height); + const GSVector4i invalidate_r = TranslateAlignedRectByPage(t, start_bp, write_psm, write_bw, r, false).rintersect(t->m_valid); // it is invalidation but we need a real rect. + RGBAMask mask; + mask._u32 = GSUtil::GetChannelMask(write_psm); + AddDirtyRectTarget(t, invalidate_r, t->m_TEX0.PSM, t->m_TEX0.TBW, mask, false); + } + ++i; continue; } @@ -7016,11 +7124,11 @@ void GSTextureCache::Target::Update(bool cannot_scale) { if (g_texture_cache->GetTemporaryZInfo().ZBP == m_TEX0.TBP0) { - GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo(); + const GSTextureCache::TempZAddress z_address_info = g_texture_cache->GetTemporaryZInfo(); if (m_TEX0.TBP0 == z_address_info.ZBP) { //GL_CACHE("RT in RT Updating Z copy on draw %d z_offset %d", s_n, z_address_info.offset); - GSVector4i dRect = GSVector4i(total_rect.x * m_scale, (z_address_info.offset + total_rect.y) * m_scale, (total_rect.z + (1.0f / m_scale)) * m_scale, (z_address_info.offset + total_rect.w + (1.0f / m_scale)) * m_scale); + const GSVector4i dRect = GSVector4i(total_rect.x * m_scale, (z_address_info.offset + total_rect.y) * m_scale, (total_rect.z + (1.0f / m_scale)) * m_scale, (z_address_info.offset + total_rect.w + (1.0f / m_scale)) * m_scale); g_gs_device->StretchRect(m_texture, GSVector4(total_rect.x / static_cast(m_unscaled_size.x), total_rect.y / static_cast(m_unscaled_size.y), (total_rect.z + (1.0f / m_scale)) / static_cast(m_unscaled_size.x), (total_rect.w + (1.0f / m_scale)) / static_cast(m_unscaled_size.y)), g_texture_cache->GetTemporaryZ(), GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); } @@ -7120,11 +7228,6 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); - - const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - - if (offset) - m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // Else No valid size, so need to resize down. @@ -7139,32 +7242,25 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); - const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - - if (offset) - m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); - const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - - if (offset) - m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old, bool require_new_rect, GSVector4i new_rect, bool keep_old) { - if (m_unscaled_size.x == new_unscaled_width && m_unscaled_size.y == new_unscaled_height && !require_new_rect) - return true; - const GSVector2i size = m_texture->GetSize(); const GSVector2i new_unscaled_size = GSVector2i(new_unscaled_width, new_unscaled_height); const GSVector2i new_size = ScaleRenderTargetSize(new_unscaled_size, m_scale); + + if (size.x == new_size.x && size.y == new_size.y && !require_new_rect) + return true; + const bool clear = (new_size.x > size.x || new_size.y > size.y); GSTexture* tex = m_texture->IsDepthStencil() ? @@ -7219,7 +7315,7 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca if (!keep_old) { - g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); + g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); if (recycle_old) g_gs_device->Recycle(m_texture); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 807c1280d4..db27ce41ca 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -563,7 +563,7 @@ public: GSTexture* GetTemporaryZ(); TempZAddress GetTemporaryZInfo(); void SetTemporaryZInfo(u32 address, u32 offset); - /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is. void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 249d658c7d..337e0f4b5c 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1175,7 +1175,7 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.br = C.rb; + C.br = C.rb; C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) @@ -1190,7 +1190,7 @@ struct PSMain } } } - + ps_dither(C, alpha_blend.a); // Color clamp/wrap needs to be done after sw blending and dithering diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index e4de26746b..6e66cc63cb 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 61; +static constexpr u32 SHADER_CACHE_VERSION = 62;