From faecc6913bd1d28edc3bf7ff52ccf6d57eb597da Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 17 Mar 2023 23:20:06 +1000 Subject: [PATCH] GS/HW: Texture cache improvements GS/HW: Only use temporary source for recursive draw .. and don't insert it into the page map. GS/HW: Lookup page list for depth sources GS/HW: Avoid target copies by using shader sampling GS/HW: Make texture cache a global pointer GS/HW: Remove GetID() from GSTexture It only made sense for OpenGL, was always zero in Vulkan. GS/HW: Rewrite texture sampling hazard detection Also avoid redundant channel shuffle setup. GS/HW: Turn Haunting Ground CRC into an OI fix --- bin/resources/GameIndex.yaml | 4 + bin/resources/shaders/dx11/tfx.fx | 41 +- bin/resources/shaders/opengl/tfx_fs.glsl | 31 +- bin/resources/shaders/vulkan/tfx.glsl | 39 +- pcsx2/GS/GS.cpp | 10 +- pcsx2/GS/GSCrc.cpp | 5 - pcsx2/GS/GSCrc.h | 1 - pcsx2/GS/GSState.cpp | 61 +- pcsx2/GS/GSState.h | 5 +- pcsx2/GS/Renderers/Common/GSDevice.cpp | 17 +- pcsx2/GS/Renderers/Common/GSDevice.h | 8 +- pcsx2/GS/Renderers/Common/GSTexture.h | 1 - pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 12 - pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp | 1 + pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 30 +- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 45 +- pcsx2/GS/Renderers/HW/GSHwHack.h | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 901 +++++++++++------- pcsx2/GS/Renderers/HW/GSRendererHW.h | 24 +- .../GS/Renderers/HW/GSRendererHWMultiISA.cpp | 6 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 722 +++++++------- pcsx2/GS/Renderers/HW/GSTextureCache.h | 135 +-- .../GS/Renderers/HW/GSTextureReplacements.cpp | 12 +- pcsx2/GS/Renderers/HW/GSTextureReplacements.h | 2 +- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 3 +- pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h | 2 +- pcsx2/GS/Renderers/SW/GSRendererSW.cpp | 4 +- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 25 +- 28 files changed, 1256 insertions(+), 893 deletions(-) diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index 3bd10d068e..ade6bd701f 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -16193,6 +16193,7 @@ SLES-52877: compat: 5 gsHWFixes: halfPixelOffset: 1 # Fixes blurriness. + beforeDraw: "OI_HauntingGround" # Fix bloom. SLES-52882: name: "Stolen" region: "PAL-M5" @@ -31346,6 +31347,7 @@ SLPM-65913: compat: 5 gsHWFixes: halfPixelOffset: 1 # Fixes blurriness. + beforeDraw: "OI_HauntingGround" # Fix bloom. SLPM-65914: name: "Nana" region: "NTSC-J" @@ -34169,6 +34171,7 @@ SLPM-66638: region: "NTSC-J" gsHWFixes: halfPixelOffset: 1 # Fixes blurriness. + beforeDraw: "OI_HauntingGround" # Fix bloom. SLPM-66639: name: "Street Fighter III - 3rd Strike [Capcom the Best]" region: "NTSC-J" @@ -46709,6 +46712,7 @@ SLUS-21075: compat: 5 gsHWFixes: halfPixelOffset: 1 # Fixes blurriness. + beforeDraw: "OI_HauntingGround" # Fix bloom. SLUS-21076: name: "Atari Anthology" region: "NTSC-U" diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index d96adb52fb..cc8dae9257 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -37,6 +37,7 @@ #define PS_LTF 1 #define PS_TCOFFSETHACK 0 #define PS_POINT_SAMPLER 0 +#define PS_REGION_RECT 0 #define PS_SHUFFLE 0 #define PS_READ_BA 0 #define PS_READ16_SRC 0 @@ -178,6 +179,8 @@ float4 sample_c(float2 uv, float uv_w) { #if PS_TEX_IS_FB == 1 return RtTexture.Load(int3(int2(uv * WH.zw), 0)); +#elif PS_REGION_RECT == 1 + return Texture.Load(int3(int2(uv), 0)); #else if (PS_POINT_SAMPLER) { @@ -241,7 +244,15 @@ float4 clamp_wrap_uv(float4 uv) if(PS_WMS == PS_WMT) { - if(PS_WMS == 2) + if(PS_REGION_RECT != 0 && PS_WMS == 0) + { + uv = frac(uv); + } + else if(PS_REGION_RECT != 0 && PS_WMS == 1) + { + uv = saturate(uv); + } + else if(PS_WMS == 2) { uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); } @@ -257,7 +268,15 @@ float4 clamp_wrap_uv(float4 uv) } else { - if(PS_WMS == 2) + if(PS_REGION_RECT != 0 && PS_WMS == 0) + { + uv.xz = frac(uv.xz); + } + else if(PS_REGION_RECT != 0 && PS_WMS == 1) + { + uv.xz = saturate(uv.xz); + } + else if(PS_WMS == 2) { uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); } @@ -268,7 +287,15 @@ float4 clamp_wrap_uv(float4 uv) #endif uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & asuint(MinMax.xx)) | asuint(MinMax.zz)) / tex_size.xx; } - if(PS_WMT == 2) + if(PS_REGION_RECT != 0 && PS_WMT == 0) + { + uv.yw = frac(uv.yw); + } + else if(PS_REGION_RECT != 0 && PS_WMT == 1) + { + uv.yw = saturate(uv.yw); + } + else if(PS_WMT == 2) { uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); } @@ -281,6 +308,12 @@ float4 clamp_wrap_uv(float4 uv) } } + if(PS_REGION_RECT != 0) + { + // Normalized -> Integer Coordinates. + uv = clamp(uv * WH.zwzw + STRange.xyxy, STRange.xyxy, STRange.zwzw); + } + return uv; } @@ -564,7 +597,7 @@ float4 sample_color(float2 st, float uv_w) float4x4 c; float2 dd; - if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2) + if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2) { c[0] = sample_c(st, uv_w); } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 8814ed9dba..6268662b4b 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -98,6 +98,8 @@ vec4 sample_c(vec2 uv) { #if PS_TEX_IS_FB == 1 return fetch_rt(); +#elif PS_REGION_RECT + return texelFetch(TextureSampler, ivec2(uv), 0); #else #if PS_POINT_SAMPLER @@ -163,7 +165,11 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_WMS == PS_WMT -#if PS_WMS == 2 +#if PS_REGION_RECT == 1 && PS_WMS == 0 + uv_out = fract(uv); +#elif PS_REGION_RECT == 1 && PS_WMS == 1 + uv_out = clamp(uv, vec4(0.0f), vec4(1.0f)); +#elif PS_WMS == 2 uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); #elif PS_WMS == 3 #if PS_FST == 0 @@ -176,7 +182,13 @@ vec4 clamp_wrap_uv(vec4 uv) #else // PS_WMS != PS_WMT -#if PS_WMS == 2 +#if PS_REGION_RECT == 1 && PS_WMS == 0 + uv.xz = fract(uv.xz); + +#elif PS_REGION_RECT == 1 && PS_WMS == 1 + uv.xz = clamp(uv.xz, vec2(0.0f), vec2(1.0f)); + +#elif PS_WMS == 2 uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); #elif PS_WMS == 3 @@ -187,7 +199,13 @@ vec4 clamp_wrap_uv(vec4 uv) #endif -#if PS_WMT == 2 +#if PS_REGION_RECT == 1 && PS_WMT == 0 + uv_out.yw = fract(uv.yw); + +#elif PS_REGION_RECT == 1 && PS_WMT == 1 + uv_out.yw = clamp(uv.yw, vec2(0.0f), vec2(1.0f)); + +#elif PS_WMT == 2 uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); #elif PS_WMT == 3 @@ -197,6 +215,11 @@ vec4 clamp_wrap_uv(vec4 uv) uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy; #endif +#endif + +#if PS_REGION_RECT == 1 + // Normalized -> Integer Coordinates. + uv_out = clamp(uv_out * WH.zwzw + STRange.xyxy, STRange.xyxy, STRange.zwzw); #endif return uv_out; @@ -473,7 +496,7 @@ vec4 sample_color(vec2 st) vec2 dd; // FIXME I'm not sure this condition is useful (I think code will be optimized) -#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2) +#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2) // No software LTF and pure 32 bits RGBA texure without special texture wrapping c[0] = sample_c(st); #ifdef TEX_COORD_DEBUG diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 2421c8eaa4..68e16f31c7 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -415,6 +415,8 @@ vec4 sample_c(vec2 uv) { #if PS_TEX_IS_FB return sample_from_rt(); +#elif PS_REGION_RECT + return texelFetch(Texture, ivec2(uv), 0); #else #if PS_POINT_SAMPLER // Weird issue with ATI/AMD cards, @@ -477,7 +479,15 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_WMS == PS_WMT { - #if PS_WMS == 2 + #if PS_REGION_RECT == 1 && PS_WMS == 0 + { + uv = fract(uv); + } + #elif PS_REGION_RECT == 1 && PS_WMS == 1 + { + uv = clamp(uv, vec4(0.0f), vec4(1.0f)); + } + #elif PS_WMS == 2 { uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); } @@ -494,7 +504,15 @@ vec4 clamp_wrap_uv(vec4 uv) } #else { - #if PS_WMS == 2 + #if PS_REGION_RECT == 1 && PS_WMS == 0 + { + uv.xz = fract(uv.xz); + } + #elif PS_REGION_RECT == 1 && PS_WMS == 1 + { + uv.xz = clamp(uv.xz, vec2(0.0f), vec2(1.0f)); + } + #elif PS_WMS == 2 { uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); } @@ -506,7 +524,15 @@ vec4 clamp_wrap_uv(vec4 uv) uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx; } #endif - #if PS_WMT == 2 + #if PS_REGION_RECT == 1 && PS_WMT == 0 + { + uv.yw = fract(uv.yw); + } + #elif PS_REGION_RECT == 1 && PS_WMT == 1 + { + uv.yw = clamp(uv.yw, vec2(0.0f), vec2(1.0f)); + } + #elif PS_WMT == 2 { uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); } @@ -521,6 +547,11 @@ vec4 clamp_wrap_uv(vec4 uv) } #endif + #if PS_REGION_RECT == 1 + // Normalized -> Integer Coordinates. + uv = clamp(uv * WH.zwzw + STRange.xyxy, STRange.xyxy, STRange.zwzw); + #endif + return uv; } @@ -797,7 +828,7 @@ vec4 sample_color(vec2 st) mat4 c; vec2 dd; - #if PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2 + #if PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2 { c[0] = sample_c(st); } diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index f3e7204a7a..f1112c1264 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -282,7 +282,7 @@ bool GSreopen(bool recreate_display, bool recreate_renderer, const Pcsx2Config:: if (recreate_display) { g_gs_device->ResetAPIState(); - if (Host::BeginPresentFrame(false) == HostDisplay::PresentResult::OK) + if (Host::BeginPresentFrame(true) == HostDisplay::PresentResult::OK) Host::EndPresentFrame(); } @@ -643,12 +643,12 @@ void GSgetStats(std::string& info) void GSgetMemoryStats(std::string& info) { - if (GSConfig.Renderer == GSRendererType::SW || GSConfig.Renderer == GSRendererType::Null) + if (!g_texture_cache) return; - const u64 targets = GSRendererHW::GetInstance()->GetTextureCache()->GetTargetMemoryUsage(); - const u64 sources = GSRendererHW::GetInstance()->GetTextureCache()->GetSourceMemoryUsage(); - const u64 hashcache = GSRendererHW::GetInstance()->GetTextureCache()->GetHashCacheMemoryUsage(); + const u64 targets = g_texture_cache->GetTargetMemoryUsage(); + const u64 sources = g_texture_cache->GetSourceMemoryUsage(); + const u64 hashcache = g_texture_cache->GetHashCacheMemoryUsage(); const u64 pool = g_gs_device->GetPoolMemoryUsage(); const u64 total = targets + sources + hashcache + pool; diff --git a/pcsx2/GS/GSCrc.cpp b/pcsx2/GS/GSCrc.cpp index 48caadf09e..ea72eed33e 100644 --- a/pcsx2/GS/GSCrc.cpp +++ b/pcsx2/GS/GSCrc.cpp @@ -24,11 +24,6 @@ const CRC::Game CRC::m_games[] = { // Note: IDs 0x7ACF7E03, 0x7D4EA48F, 0x37C53760 - shouldn't be added as it's from the multiloaders when packing games. {0x00000000, NoTitle /* NoRegion */}, - {0x08C1ED4D, HauntingGround /* EU */}, - {0x2CD5794C, HauntingGround /* EU */}, - {0x867BB945, HauntingGround /* JP */}, - {0xE263BC4B, HauntingGround /* JP */}, - {0x901AAC09, HauntingGround /* US */}, {0x6F8545DB, ICO /* US */}, {0x48CDF317, ICO /* US */}, // Demo {0xB01A4C95, ICO /* JP */}, diff --git a/pcsx2/GS/GSCrc.h b/pcsx2/GS/GSCrc.h index 03fe0ea617..0568495833 100644 --- a/pcsx2/GS/GSCrc.h +++ b/pcsx2/GS/GSCrc.h @@ -24,7 +24,6 @@ public: { NoTitle, GetawayGames, - HauntingGround, ICO, KOF2002, PolyphonyDigitalGames, diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index d2e5bfb5c5..961272e1eb 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3357,7 +3357,7 @@ static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, i return sets_bits || clears_bits; } -GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear) +GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize) { // TODO: some of the +1s can be removed if linear == false @@ -3366,21 +3366,18 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c const int w = 1 << tw; const int h = 1 << th; - const int tw_mask = w - 1; - const int th_mask = h - 1; + const int tw_mask = (1 << tw) - 1; + const int th_mask = (1 << th) - 1; - const GSVector4i tr(0, 0, w, h); + GSVector4i tr(0, 0, w, h); const int wms = CLAMP.WMS; const int wmt = CLAMP.WMT; const int minu = (int)CLAMP.MINU; const int minv = (int)CLAMP.MINV; - - // For the FixedTEX0 case, in hardware, we handle this in the texture cache. Don't OR the bits in here, otherwise - // we'll end up with an invalid rectangle, we want the passed-in rectangle to be relative to the normalized size. - const int maxu = (wms != CLAMP_REGION_REPEAT || (int)CLAMP.MAXU < w) ? (int)CLAMP.MAXU : 0; - const int maxv = (wmt != CLAMP_REGION_REPEAT || (int)CLAMP.MAXV < h) ? (int)CLAMP.MAXV : 0; + const int maxu = (int)CLAMP.MAXU; + const int maxv = (int)CLAMP.MAXV; GSVector4i vr = tr; @@ -3391,10 +3388,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c case CLAMP_CLAMP: break; case CLAMP_REGION_CLAMP: - if (vr.x < minu) - vr.x = minu; - if (vr.z > maxu + 1) - vr.z = maxu + 1; + vr.x = minu; + vr.z = maxu + 1; break; case CLAMP_REGION_REPEAT: vr.x = maxu; @@ -3411,10 +3406,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c case CLAMP_CLAMP: break; case CLAMP_REGION_CLAMP: - if (vr.y < minv) - vr.y = minv; - if (vr.w > maxv + 1) - vr.w = maxv + 1; + vr.y = minv; + vr.w = maxv + 1; break; case CLAMP_REGION_REPEAT: vr.y = maxv; @@ -3424,6 +3417,13 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c __assume(0); } + // Software renderer fixes TEX0 so that TW/TH contain MAXU/MAXV. + // Hardware renderer doesn't, and handles it in the texture cache, so don't clamp here. + if (clamp_to_tsize) + vr = vr.rintersect(tr); + else + tr = tr.runion(vr); + u8 uses_border = 0; if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX || @@ -3886,6 +3886,33 @@ GIFRegTEX0 GSState::GetTex0Layer(u32 lod) return TEX0; } +bool GSState::IsTBPFrameOrZ(u32 tbp) const +{ + GSDrawingContext* context = m_context; + const bool is_frame = (context->FRAME.Block() == tbp); + const bool is_z = (context->ZBUF.Block() == tbp); + if (!is_frame && !is_z) + return false; + + const u32 fm = context->FRAME.FBMSK; + const u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; + const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; + + const u32 max_z = (0xFFFFFFFF >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8)); + const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1)) + || (!context->TEST.DATE && (context->FRAME.FBMSK & GSLocalMemory::m_psm[context->FRAME.PSM].fmsk) == GSLocalMemory::m_psm[context->FRAME.PSM].fmsk); + const bool no_ds = ( + // Depth is always pass/fail (no read) and write are discarded. + (zm != 0 && context->TEST.ZTST <= ZTST_ALWAYS) || + // Depth test will always pass + (zm != 0 && context->TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z) || + // Depth will be written through the RT + (!no_rt && context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && context->TEST.ZTE)); + + // Relying a lot on the optimizer here... I don't like it. + return (is_frame && !no_rt) || (is_z && !no_ds); +} + // GSTransferBuffer GSState::GSTransferBuffer::GSTransferBuffer() diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 887527dcc8..02eb38ee57 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -199,7 +199,7 @@ protected: GSVector4i coverage; ///< Part of the texture used u8 uses_boundary; ///< Whether or not the usage touches the left, top, right, or bottom edge (and therefore needs wrap modes preserved) }; - TextureMinMaxResult GetTextureMinMax(const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear); + TextureMinMaxResult GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, bool linear, bool clamp_to_tsize); bool TryAlphaTest(u32& fm, const u32 fm_mask, u32& zm); bool IsOpaque(); bool IsMipMapDraw(); @@ -918,4 +918,7 @@ public: PRIM_OVERLAP PrimitiveOverlap(); GIFRegTEX0 GetTex0Layer(u32 lod); + + /// Returns true if the specified texture address matches the frame or Z buffer. + bool IsTBPFrameOrZ(u32 tbp) const; }; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index bca6f946c0..5f3da320e1 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -449,11 +449,22 @@ bool GSDevice::ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, { const GSTexture::Format fmt = t2 ? t2->GetFormat() : GetDefaultTextureFormat(type); const int levels = t2 ? (t2->IsMipmap() ? MipmapLevelsForSize(w, h) : 1) : 1; - delete t2; - t2 = FetchSurface(type, w, h, levels, fmt, clear, prefer_reuse); + GSTexture* new_t = FetchSurface(type, w, h, levels, fmt, clear, prefer_reuse); + if (new_t) + { + if (t2) + { + // TODO: We probably want to make this optional if we're overwriting it... + const GSVector4 sRect(0, 0, 1, 1); + const GSVector4 dRect(0, 0, t2->GetWidth(), t2->GetHeight()); + StretchRect(m_current, sRect, new_t, dRect, ShaderConvert::COPY, true); + Recycle(t2); + } - *t = t2; + t2 = new_t; + *t = t2; + } } return t2 != NULL; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index cd40c30b0b..6ec990e8c2 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -296,7 +296,6 @@ struct alignas(16) GSHWDrawConfig { struct { - // *** Word 1 // Format u32 aem_fmt : 2; u32 pal_fmt : 2; @@ -328,9 +327,6 @@ struct alignas(16) GSHWDrawConfig u32 write_rg : 1; u32 fbmask : 1; - //u32 _free1:0; - - // *** Word 2 // Blend and Colclip u32 blend_a : 2; u32 blend_b : 2; @@ -366,6 +362,7 @@ struct alignas(16) GSHWDrawConfig u32 automatic_lod : 1; u32 manual_lod : 1; u32 point_sampler : 1; + u32 region_rect : 1; // Scan mask u32 scanmsk : 2; @@ -862,6 +859,9 @@ public: /// Converts a colour format to an indexed format texture. virtual void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) {} + /// Converts a colour format to an indexed format texture. + virtual void ConvertToIndexedTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) {} + virtual void RenderHW(GSHWDrawConfig& config) {} __fi FeatureSupport Features() const { return m_features; } diff --git a/pcsx2/GS/Renderers/Common/GSTexture.h b/pcsx2/GS/Renderers/Common/GSTexture.h index 0ced7e6436..4074bee6d2 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.h +++ b/pcsx2/GS/Renderers/Common/GSTexture.h @@ -91,7 +91,6 @@ public: virtual void GenerateMipmap() {} virtual bool Save(const std::string& fn); virtual void Swap(GSTexture* tex); - virtual u32 GetID() { return 0; } __fi int GetWidth() const { return m_size.x; } __fi int GetHeight() const { return m_size.y; } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index ef332b845a..f4a3c06054 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1458,7 +1458,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) PSSetShaderResources(config.tex, config.pal); GSTexture* rt_copy = nullptr; - GSTexture* ds_copy = nullptr; if (config.require_one_barrier || (config.tex && config.tex == config.rt)) // Used as "bind rt" flag when texture barrier is unsupported { // Bind the RT.This way special effect can use it. @@ -1475,15 +1474,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) } } - if (config.tex && config.tex == config.ds) - { - // mainly for ico (depth buffer used as texture) - // binding to 0 here is safe, because config.tex can't equal both tex and rt - CloneTexture(config.ds, &ds_copy, config.drawarea); - if (ds_copy) - PSSetShaderResource(0, ds_copy); - } - SetupVS(config.vs, &config.cb_vs); SetupGS(config.gs); SetupPS(config.ps, &config.cb_ps, config.sampler); @@ -1556,8 +1546,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) if (rt_copy) Recycle(rt_copy); - if (ds_copy) - Recycle(ds_copy); if (primid_tex) Recycle(primid_tex); diff --git a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp index 3bc56dd9cd..e302feaa65 100644 --- a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp @@ -158,6 +158,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_LTF", sel.ltf); sm.AddMacro("PS_TCOFFSETHACK", sel.tcoffsethack); sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler); + sm.AddMacro("PS_REGION_RECT", sel.region_rect); sm.AddMacro("PS_SHUFFLE", sel.shuffle); sm.AddMacro("PS_READ_BA", sel.read_ba); sm.AddMacro("PS_READ16_SRC", sel.real16src); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 1772951a35..3953599b8f 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -1622,6 +1622,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_LTF", sel.ltf); sm.AddMacro("PS_TCOFFSETHACK", sel.tcoffsethack); sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler); + sm.AddMacro("PS_REGION_RECT", sel.region_rect); sm.AddMacro("PS_SHUFFLE", sel.shuffle); sm.AddMacro("PS_READ_BA", sel.read_ba); sm.AddMacro("PS_READ16_SRC", sel.real16src); @@ -2582,7 +2583,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) GSTexture12* draw_ds = static_cast(config.ds); GSTexture12* draw_rt_clone = nullptr; GSTexture12* hdr_rt = nullptr; - GSTexture12* copy_ds = nullptr; // Switch to hdr target for colclip rendering if (pipe.ps.hdr) @@ -2634,30 +2634,9 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) } } - if (config.tex) - { - if (config.tex == config.ds) - { - // requires a copy of the depth buffer. this is mainly for ico. - copy_ds = static_cast(CreateDepthStencil(rtsize.x, rtsize.y, GSTexture::Format::DepthStencil, false)); - if (copy_ds) - { - EndRenderPass(); - - GL_PUSH("Copy depth to temp texture for shuffle {%d,%d %dx%d}", - config.drawarea.left, config.drawarea.top, - config.drawarea.width(), config.drawarea.height()); - - pxAssert(copy_ds->GetState() == GSTexture::State::Invalidated); - CopyRect(config.ds, copy_ds, GSVector4i(config.ds->GetSize()).zwxy(), 0, 0); - PSSetShaderResource(0, copy_ds, true); - } - } - } // clear texture binding when it's bound to RT or DS - else if (m_tfx_textures[0] && - ((config.rt && static_cast(config.rt)->GetSRVDescriptor() == m_tfx_textures[0]) || - (config.ds && static_cast(config.ds)->GetSRVDescriptor() == m_tfx_textures[0]))) + if (((config.rt && static_cast(config.rt)->GetSRVDescriptor() == m_tfx_textures[0]) || + (config.ds && static_cast(config.ds)->GetSRVDescriptor() == m_tfx_textures[0]))) { PSSetShaderResource(0, nullptr, false); } @@ -2748,9 +2727,6 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) } } - if (copy_ds) - Recycle(copy_ds); - if (draw_rt_clone) Recycle(draw_rt_clone); diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 4424f6b60b..e4254d2302 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -110,6 +110,9 @@ bool GSHwHack::GSC_Manhunt2(GSRendererHW& r, const GSFrameInfo& fi, int& skip) bool GSHwHack::GSC_CrashBandicootWoC(GSRendererHW& r, const GSFrameInfo& fi, int& skip) { + if (s_nativeres) + return false; + // Channel effect not properly supported - Removes fog to fix the fog wall issue on Direct3D at any resolution, and while upscaling on every Hardware renderer. if (skip == 0) { @@ -548,6 +551,18 @@ bool GSHwHack::GSC_UrbanReign(GSRendererHW& r, const GSFrameInfo& fi, int& skip) { skip = 1; // Black shadow } + + // Urban Reign downsamples the framebuffer with page-wide columns at a time, and offsets the TBP0 forward as such, + // which would be fine, except their texture coordinates appear to be off by one. Which prevents the page translation + // from matching the last column, because it's trying to fit the last 65 columns of a 640x448 (effectively 641x448) + // texture into a 640x448 render target. + if (fi.TME && fi.TBP0 != fi.FBP && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMCT32 && + RCONTEXT->FRAME.FBW == (RCONTEXT->TEX0.TBW / 2) && RCONTEXT->CLAMP.WMS == CLAMP_REGION_CLAMP && + RCONTEXT->CLAMP.WMT == CLAMP_REGION_CLAMP && ((r.m_vt.m_max.t == GSVector4(64.0f, 448.0f)).mask() == 0x3)) + { + GL_CACHE("GSC_UrbanReign: Fix region clamp to 64 wide"); + r.m_context->CLAMP.MAXU = 63; + } } return true; @@ -810,7 +825,7 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds const u32 c = vi.RGBAQ.U32[0]; r.m_mem.WritePixel32(x, y, c, FBP, FBW); } - r.m_tc->InvalidateVideoMem(r.m_context->offset.fb, r.m_r); + g_texture_cache->InvalidateVideoMem(r.m_context->offset.fb, r.m_r); return false; } return true; @@ -981,7 +996,7 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu TEX0.TBW = RCONTEXT->FRAME.FBW; TEX0.PSM = RCONTEXT->FRAME.PSM; - if (GSTextureCache::Target* tmp_rt = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true)) + if (GSTextureCache::Target* tmp_rt = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true)) { GL_INS("OI_RozenMaidenGebetGarden FB clear"); g_gs_device->ClearRenderTarget(tmp_rt->m_texture, 0); @@ -999,7 +1014,7 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu TEX0.TBW = RCONTEXT->FRAME.FBW; TEX0.PSM = RCONTEXT->ZBUF.PSM; - if (GSTextureCache::Target* tmp_ds = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true)) + if (GSTextureCache::Target* tmp_ds = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true)) { GL_INS("OI_RozenMaidenGebetGarden ZB clear"); g_gs_device->ClearDepth(tmp_ds->m_texture); @@ -1032,7 +1047,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GL_INS("OI_SonicUnleashed replace draw by a copy"); - GSTextureCache::Target* src = r.m_tc->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true); const GSVector2i src_size(src->m_texture->GetSize()); GSVector2i rt_size(rt->GetSize()); @@ -1040,7 +1055,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // This is awful, but so is the CRC hack... it's a texture shuffle split horizontally instead of vertically. if (rt_size.x < src_size.x || rt_size.y < src_size.y) { - GSTextureCache::Target* rt_again = r.m_tc->LookupTarget(Frame, src_size, src->m_scale, GSTextureCache::RenderTarget, true); + GSTextureCache::Target* rt_again = g_texture_cache->LookupTarget(Frame, src_size, src->m_scale, GSTextureCache::RenderTarget, true); if (rt_again->m_unscaled_size.x < src->m_unscaled_size.x || rt_again->m_unscaled_size.y < src->m_unscaled_size.y) { rt_again->ResizeTexture(std::max(rt_again->m_unscaled_size.x, src->m_unscaled_size.x), @@ -1122,7 +1137,7 @@ bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, const GSFrameInfo& fi, int& ski GIFRegTEX0 TEX0 = {}; TEX0.TBP0 = fi.FBP; TEX0.TBW = 8; - GSTextureCache::Target* dst = r.m_tc->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true); + GSTextureCache::Target* dst = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil, true); if (dst) { g_gs_device->ClearDepth(dst->m_texture); @@ -1144,10 +1159,25 @@ bool GSHwHack::OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GS g_gs_device->CopyRect(t->m_texture, rt, rc, 0, 0); } - r.m_tc->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporarySource(); return false; } +bool GSHwHack::OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + // Haunting Ground clears two targets by doing a 256x448 direct colour write at 0x3000, covering a target at 0x3380. + // This currently isn't handled in our HLE clears, so we need to manually remove the other target. + if (rt && !ds && !t && r.IsConstantDirectWriteMemClear(true)) + { + GL_CACHE("GSHwHack::OI_HauntingGround()"); + g_texture_cache->InvalidateVideoMemTargets(GSTextureCache::RenderTarget, RCONTEXT->FRAME.Block(), + RCONTEXT->FRAME.FBW, RCONTEXT->FRAME.PSM, r.m_r); + } + + // Not skipping anything. This is just an invalidation hack. + return true; +} + #undef RCONTEXT #undef RPRIM @@ -1224,6 +1254,7 @@ const GSHwHack::Entry GSHwHack::s_before_draw_functions[] CRC_F(OI_ArTonelico2, CRCHackLevel::Minimum), CRC_F(OI_BurnoutGames, CRCHackLevel::Minimum), CRC_F(OI_Battlefield2, CRCHackLevel::Minimum), + CRC_F(OI_HauntingGround, CRCHackLevel::Minimum) }; #undef CRC_F diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.h b/pcsx2/GS/Renderers/HW/GSHwHack.h index 2b3a3f7766..e6be84366e 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.h +++ b/pcsx2/GS/Renderers/HW/GSHwHack.h @@ -62,8 +62,8 @@ public: static bool OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); static bool OI_BurnoutGames(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); - static bool OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); + static bool OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); template struct Entry diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b017cf4d08..5f693b1c6a 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -23,13 +23,14 @@ GSRendererHW::GSRendererHW() : GSRenderer() - , m_tc(new GSTextureCache()) { MULTI_ISA_SELECT(GSRendererHWPopulateFunctions)(*this); m_mipmap = (GSConfig.HWMipmap >= HWMipmapLevel::Basic); SetTCOffset(); - GSTextureReplacements::Initialize(m_tc); + pxAssert(!g_texture_cache); + g_texture_cache = std::make_unique(); + GSTextureReplacements::Initialize(); // Hope nothing requires too many draw calls. m_drawlist.reserve(2048); @@ -48,68 +49,29 @@ void GSRendererHW::SetTCOffset() GSRendererHW::~GSRendererHW() { - delete m_tc; + g_texture_cache.reset(); } void GSRendererHW::Destroy() { - m_tc->RemoveAll(); + g_texture_cache->RemoveAll(); GSTextureReplacements::Shutdown(); GSRenderer::Destroy(); } void GSRendererHW::PurgeTextureCache() { - m_tc->RemoveAll(); + g_texture_cache->RemoveAll(); } void GSRendererHW::ReadbackTextureCache() { - m_tc->ReadbackAll(); + g_texture_cache->ReadbackAll(); } GSTexture* GSRendererHW::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size) { - return m_tc->LookupPaletteSource(CBP, CPSM, CBW, offset, scale, size); -} - -bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) -{ - if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier) - return false; - - // Texture is actually the frame buffer. Stencil emulation to compute shadow (Jak series/tri-ace game) - // Will hit the "m_ps_sel.tex_is_fb = 1" path in the draw - if (m_vt.m_primclass == GS_TRIANGLE_CLASS) - { - if (m_context->FRAME.FBMSK == 0x00FFFFFF && TEX0.TBP0 == m_context->FRAME.Block()) - m_tex_is_fb = true; - } - else if (m_vt.m_primclass == GS_SPRITE_CLASS) - { - if (TEX0.TBP0 == m_context->FRAME.Block()) - { - m_tex_is_fb = IsPossibleTextureShuffle(dst, TEX0); - - if (!m_tex_is_fb && !m_vt.IsLinear()) - { - // Make sure that we're not sampling away from the area we're rendering. - // We need to take the absolute here, because Beyond Good and Evil undithers itself using a -1,-1 offset. - const GSVector4 diff(m_vt.m_min.p.xyxy(m_vt.m_max.p) - m_vt.m_min.t.xyxy(m_vt.m_max.t)); - if ((diff.abs() < GSVector4(1.0f)).alltrue()) - m_tex_is_fb = true; - } - } - } - - return m_tex_is_fb; -} - -bool GSRendererHW::IsPossibleTextureShuffle(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) const -{ - return (PRIM->TME && m_vt.m_primclass == GS_SPRITE_CLASS && - dst->m_32_bits_fmt && GSLocalMemory::m_psm[TEX0.PSM].bpp == 16 && - GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16); + return g_texture_cache->LookupPaletteSource(CBP, CPSM, CBW, offset, scale, size); } void GSRendererHW::SetGameCRC(u32 crc) @@ -134,7 +96,7 @@ void GSRendererHW::Reset(bool hardware_reset) // Force targets to preload for 2 frames (for 30fps games). static constexpr u8 TARGET_PRELOAD_FRAMES = 2; - m_tc->RemoveAll(); + g_texture_cache->RemoveAll(); m_force_preload = TARGET_PRELOAD_FRAMES; GSRenderer::Reset(hardware_reset); @@ -179,7 +141,7 @@ void GSRendererHW::VSync(u32 field, bool registers_written) } else { - m_tc->IncAge(); + g_texture_cache->IncAge(); } m_last_draw_n = s_n + 1; // +1 for vsync @@ -187,11 +149,11 @@ void GSRendererHW::VSync(u32 field, bool registers_written) GSRenderer::VSync(field, registers_written); - if (m_tc->GetHashCacheMemoryUsage() > 1024 * 1024 * 1024) + if (g_texture_cache->GetHashCacheMemoryUsage() > 1024 * 1024 * 1024) { Host::AddKeyedFormattedOSDMessage("HashCacheOverflow", Host::OSD_ERROR_DURATION, "Hash cache has used %.2f MB of VRAM, disabling.", - static_cast(m_tc->GetHashCacheMemoryUsage()) / 1048576.0f); - m_tc->RemoveAll(); + static_cast(g_texture_cache->GetHashCacheMemoryUsage()) / 1048576.0f); + g_texture_cache->RemoveAll(); g_gs_device->PurgePool(); GSConfig.TexturePreloading = TexturePreloadingLevel::Partial; } @@ -217,7 +179,7 @@ GSTexture* GSRendererHW::GetOutput(int i, float& scale, int& y_offset) TEX0.TBW = curFramebuffer.FBW; TEX0.PSM = curFramebuffer.PSM; - if (GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, framebufferSize, GetTextureScaleFactor())) + if (GSTextureCache::Target* rt = g_texture_cache->LookupDisplayTarget(TEX0, framebufferSize, GetTextureScaleFactor())) { rt->Update(false); t = rt->m_texture; @@ -256,7 +218,7 @@ GSTexture* GSRendererHW::GetFeedbackOutput(float& scale) TEX0.TBW = m_regs->EXTBUF.EXBW; TEX0.PSM = PCRTCDisplays.PCRTCDisplays[index].PSM; - GSTextureCache::Target* rt = m_tc->LookupDisplayTarget(TEX0, fb_size, GetTextureScaleFactor()); + GSTextureCache::Target* rt = g_texture_cache->LookupDisplayTarget(TEX0, fb_size, GetTextureScaleFactor()); if (!rt) return nullptr; @@ -796,7 +758,7 @@ GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex) GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16 && (tex->m_32_bits_fmt || (m_context->TEX0.TBP0 != m_context->FRAME.Block() && IsOpaque() && !(m_context->TEX1.MMIN & 1) && - m_context->FRAME.FBMSK && m_tc->Has32BitTarget(m_context->FRAME.Block())))); + m_context->FRAME.FBMSK && g_texture_cache->Has32BitTarget(m_context->FRAME.Block())))); if (possible_texture_shuffle) { const u32 tex_width_pgs = (tex->m_target ? tex->m_from_target_TEX0.TBW : tex->m_TEX0.TBW); @@ -815,7 +777,7 @@ GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex) } } - u32 height = m_tc->GetTargetHeight(m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, min_height); + u32 height = g_texture_cache->GetTargetHeight(m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, min_height); GL_INS("Target size for %x %u %u: %ux%u", m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, width, height); @@ -969,7 +931,7 @@ void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } if (loop_h || loop_w) { - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); + g_texture_cache->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); if (loop_h) { rect.y = 0; @@ -980,10 +942,10 @@ void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS rect.x = 0; rect.z = r.w - 2048; } - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); + g_texture_cache->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), rect, eewrite); } else - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite); + g_texture_cache->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r, eewrite); } void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut) @@ -1003,13 +965,14 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS if (!(iter->draw == s_n && BITBLTBUF.SBP == iter->blit.DBP && iter->blit.DPSM == BITBLTBUF.SPSM && r.eq(iter->rect))) continue; - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); + + g_texture_cache->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); skip = true; break; } if(!skip) - m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); + g_texture_cache->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r); } void GSRendererHW::Move() @@ -1022,7 +985,7 @@ void GSRendererHW::Move() const int w = m_env.TRXREG.RRW; const int h = m_env.TRXREG.RRH; - if (m_tc->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, + if (g_texture_cache->Move(m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, sx, sy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, dx, dy, w, h)) { // Handled entirely in TC, no need to update local memory. @@ -1141,10 +1104,10 @@ void GSRendererHW::SwSpriteRender() const u8 alpha_fix = m_context->ALPHA.FIX; if (texture_mapping_enabled) - m_tc->InvalidateLocalMem(spo, GSVector4i(sx, sy, sx + w, sy + h)); + g_texture_cache->InvalidateLocalMem(spo, GSVector4i(sx, sy, sx + w, sy + h)); constexpr bool invalidate_local_mem_before_fb_read = false; if (invalidate_local_mem_before_fb_read && (alpha_blending_enabled || fb_mask_enabled)) - m_tc->InvalidateLocalMem(dpo, m_r); + g_texture_cache->InvalidateLocalMem(dpo, m_r); for (int y = 0; y < h; y++, ++sy, ++dy) { @@ -1227,7 +1190,7 @@ void GSRendererHW::SwSpriteRender() } } - m_tc->InvalidateVideoMem(dpo, m_r); + g_texture_cache->InvalidateVideoMem(dpo, m_r); } bool GSRendererHW::CanUseSwSpriteRender() @@ -1603,12 +1566,8 @@ void GSRendererHW::Draw() m_channel_shuffle = false; } - GIFRegTEX0 TEX0 = {}; - - m_src = nullptr; m_texture_shuffle = false; m_copy_16bit_to_target_shuffle = false; - m_tex_is_fb = false; const bool is_split_texture_shuffle = (m_split_texture_shuffle_pages > 0); if (is_split_texture_shuffle) @@ -1635,6 +1594,20 @@ void GSRendererHW::Draw() m_context->TEX0.TBW, m_context->TEX0.PSM); } + const auto cleanup_cancelled_draw = [&]() { + // Remove any RT source. + g_texture_cache->InvalidateTemporarySource(); + + // Restore offsets. + if (is_split_texture_shuffle) + { + m_context->offset.fb = GSOffset(GSLocalMemory::m_psm[m_context->FRAME.PSM].info, m_context->FRAME.Block(), + m_context->FRAME.FBW, m_context->FRAME.PSM); + m_context->offset.tex = GSOffset(GSLocalMemory::m_psm[m_context->TEX0.PSM].info, m_context->TEX0.TBP0, + m_context->TEX0.TBW, m_context->TEX0.PSM); + } + }; + if (!GSConfig.UserHacks_DisableSafeFeatures) { if (IsConstantDirectWriteMemClear(true)) @@ -1674,20 +1647,24 @@ void GSRendererHW::Draw() if (is_zero_clear && OI_GsMemClear() && clear_height_valid) { - m_tc->InvalidateVideoMem(context->offset.fb, m_r, false, true); - m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->FRAME.Block()); + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false, true); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->FRAME.Block()); if (m_context->ZBUF.ZMSK == 0) { - m_tc->InvalidateVideoMem(context->offset.zb, m_r, false, false); - m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->ZBUF.Block()); + g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false, false); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->ZBUF.Block()); } return; } } } + + GIFRegTEX0 TEX0 = {}; + GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; + const bool process_texture = PRIM->TME && !(PRIM->ABE && m_context->ALPHA.IsBlack() && !m_context->TEX0.TCC); // Disable texture mapping if the blend is black and using alpha from vertex. if (process_texture) @@ -1780,7 +1757,7 @@ void GSRendererHW::Draw() m_context->offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear()); + tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear(), false); // Snowblind games set TW/TH to 1024, and use UVs for smaller textures inside that. // Such textures usually contain junk in local memory, so try to make them smaller based on UVs. @@ -1805,11 +1782,12 @@ void GSRendererHW::Draw() MIP_CLAMP.MAXV + 1); } - m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage) : - m_tc->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr); + src = tex_psm.depth ? g_texture_cache->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage) : + g_texture_cache->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr); } - const GSVector2i t_size = GetTargetSize(m_src); + // Estimate size based on the scissor rectangle and height cache. + const GSVector2i t_size = GetTargetSize(src); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. m_r = m_r.rintersect(GSVector4i::loadh(t_size)); @@ -1827,11 +1805,12 @@ void GSRendererHW::Draw() // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && m_vertex.next == 2; - rt = m_tc->LookupTarget(FRAME_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, fm, false, force_preload, IsConstantDirectWriteMemClear(false) && is_square); + rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, fm, false, force_preload, IsConstantDirectWriteMemClear(false) && is_square); // Draw skipped because it was a clear and there was no target. if (!rt) { + cleanup_cancelled_draw(); OI_GsMemClear(); return; } @@ -1846,7 +1825,7 @@ void GSRendererHW::Draw() ZBUF_TEX0.TBW = context->FRAME.FBW; ZBUF_TEX0.PSM = context->ZBUF.PSM; - ds = m_tc->LookupTarget(ZBUF_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, force_preload); + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, GetTextureScaleFactor(), GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, force_preload); } if (process_texture) @@ -1857,7 +1836,7 @@ void GSRendererHW::Draw() { // copy of a 16bit source in to this target, make sure it's opaque and not bilinear to reduce false positives. m_copy_16bit_to_target_shuffle = context->TEX0.TBP0 != context->FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque() - && !(context->TEX1.MMIN & 1) && !m_src->m_32_bits_fmt && context->FRAME.FBMSK; + && !(context->TEX1.MMIN & 1) && !src->m_32_bits_fmt && context->FRAME.FBMSK; } // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. @@ -1865,7 +1844,7 @@ void GSRendererHW::Draw() // // Both input and output are 16 bits and texture was initially 32 bits! m_texture_shuffle = (GSLocalMemory::m_psm[context->FRAME.PSM].bpp == 16) && (tex_psm.bpp == 16) - && draw_sprite_tex && (m_src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle); + && draw_sprite_tex && (src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle); // Okami mustn't call this code if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_context->FRAME.FBMSK & fm_mask) == 0)) @@ -1896,15 +1875,16 @@ void GSRendererHW::Draw() // If TEX0 == FBP, we're going to have a source left in the TC. // That source will get used in the actual draw unsafely, so kick it out. if (m_context->FRAME.Block() == m_context->TEX0.TBP0) - m_tc->InvalidateVideoMem(context->offset.fb, m_r, false, false); + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false, false); + cleanup_cancelled_draw(); return; } // Texture shuffle is not yet supported with strange clamp mode ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); - if (m_src->m_target && IsPossibleChannelShuffle()) + if (src->m_target && IsPossibleChannelShuffle()) { GL_INS("Channel shuffle effect detected (2nd shot)"); m_channel_shuffle = true; @@ -1940,7 +1920,8 @@ void GSRendererHW::Draw() // If m_src is from a target that isn't the same size as the texture, texture sample edge modes won't work quite the same way // If the game actually tries to access stuff outside of the rendered target, it was going to get garbage anyways so whatever // But the game could issue reads that wrap to valid areas, so move wrapping to the shader if wrapping is used - const GSVector2i unscaled_size = m_src->GetUnscaledSize(); + const GSVector2i unscaled_size = src->m_target ? src->GetRegionSize() : src->GetUnscaledSize(); + if (!is_shuffle && m_context->CLAMP.WMS == CLAMP_REPEAT && (tmm.uses_boundary & TextureMinMaxResult::USES_BOUNDARY_U) && unscaled_size.x != tw) { // Our shader-emulated region repeat doesn't upscale :( @@ -1973,7 +1954,7 @@ void GSRendererHW::Draw() } // Round 2 - if (IsMipMapActive() && GSConfig.HWMipmap == HWMipmapLevel::Full && !tex_psm.depth && !m_src->m_from_hash_cache) + if (IsMipMapActive() && GSConfig.HWMipmap == HWMipmapLevel::Full && !tex_psm.depth && !src->m_from_hash_cache) { // Upload remaining texture layers const GSVector4 tmin = m_vt.m_min.t; @@ -1993,13 +1974,13 @@ void GSRendererHW::Draw() m_vt.m_min.t *= 0.5f; m_vt.m_max.t *= 0.5f; - tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear()); + tmm = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, m_vt.IsLinear(), false); - m_src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x); + src->UpdateLayer(MIP_TEX0, tmm.coverage, layer - m_lod.x); } // we don't need to generate mipmaps since they were provided - m_src->m_texture->ClearMipmapGenerationFlag(); + src->m_texture->ClearMipmapGenerationFlag(); m_vt.m_min.t = tmin; m_vt.m_max.t = tmax; } @@ -2078,7 +2059,7 @@ void GSRendererHW::Draw() // Grandia Xtreme, Onimusha Warlord. if (!new_rect && new_height && old_end_block != rt->m_end_block) { - old_rt = m_tc->FindTargetOverlap(old_end_block, rt->m_end_block, GSTextureCache::RenderTarget, context->FRAME.PSM); + old_rt = g_texture_cache->FindTargetOverlap(old_end_block, rt->m_end_block, GSTextureCache::RenderTarget, context->FRAME.PSM); if (old_rt && old_rt != rt && GSUtil::HasSharedBits(old_rt->m_TEX0.PSM, rt->m_TEX0.PSM)) { @@ -2116,7 +2097,7 @@ void GSRendererHW::Draw() if (!new_rect && new_height && old_end_block != ds->m_end_block) { - old_ds = m_tc->FindTargetOverlap(old_end_block, ds->m_end_block, GSTextureCache::DepthStencil, context->ZBUF.PSM); + old_ds = g_texture_cache->FindTargetOverlap(old_end_block, ds->m_end_block, GSTextureCache::DepthStencil, context->ZBUF.PSM); if (old_ds && old_ds != ds && GSUtil::HasSharedBits(old_ds->m_TEX0.PSM, ds->m_TEX0.PSM)) { @@ -2133,10 +2114,10 @@ void GSRendererHW::Draw() } } - if (m_src && m_src->m_shared_texture && m_src->m_texture != *m_src->m_from_target) + if (src && src->m_shared_texture && src->m_texture != src->m_from_target->m_texture) { // Target texture changed, update reference. - m_src->m_texture = *m_src->m_from_target; + src->m_texture = src->m_from_target->m_texture; } if (GSConfig.DumpGSData) @@ -2145,7 +2126,7 @@ void GSRendererHW::Draw() std::string s; - if (GSConfig.SaveTexture && s_n >= GSConfig.SaveN && m_src) + if (GSConfig.SaveTexture && s_n >= GSConfig.SaveN && src) { s = GetDrawDumpPath("%05d_f%lld_itex_%05x_%s_%d%d_%02x_%02x_%02x_%02x.dds", s_n, frame, static_cast(context->TEX0.TBP0), psm_str(context->TEX0.PSM), @@ -2153,13 +2134,13 @@ void GSRendererHW::Draw() static_cast(context->CLAMP.MINU), static_cast(context->CLAMP.MAXU), static_cast(context->CLAMP.MINV), static_cast(context->CLAMP.MAXV)); - m_src->m_texture->Save(s); + src->m_texture->Save(s); - if (m_src->m_palette) + if (src->m_palette) { s = GetDrawDumpPath("%05d_f%lld_itpx_%05x_%s.dds", s_n, frame, context->TEX0.CBP, psm_str(context->TEX0.CPSM)); - m_src->m_palette->Save(s); + src->m_palette->Save(s); } } @@ -2180,15 +2161,17 @@ void GSRendererHW::Draw() } } - if (m_oi && !m_oi(*this, rt ? rt->m_texture : nullptr, ds ? ds->m_texture : nullptr, m_src)) + if (m_oi && !m_oi(*this, rt ? rt->m_texture : nullptr, ds ? ds->m_texture : nullptr, src)) { GL_INS("Warning skipping a draw call (%d)", s_n); + cleanup_cancelled_draw(); return; } - if (!OI_BlitFMV(rt, m_src, m_r)) + if (!OI_BlitFMV(rt, src, m_r)) { GL_INS("Warning skipping a draw call (%d)", s_n); + cleanup_cancelled_draw(); return; } @@ -2249,20 +2232,20 @@ void GSRendererHW::Draw() // - DrawPrims(rt, ds, m_src); + DrawPrims(rt, ds, src, tmm); // // Temporary source *must* be invalidated before normal, because otherwise it'll be double freed. - m_tc->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporarySource(); // // Invalidation of old targets when changing to double-buffering. if (old_rt) - m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, old_rt->m_TEX0.TBP0); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, old_rt->m_TEX0.TBP0); if (old_ds) - m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, old_ds->m_TEX0.TBP0); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, old_ds->m_TEX0.TBP0); // @@ -2274,9 +2257,9 @@ void GSRendererHW::Draw() rt->UpdateValidBits(~fm & fm_mask); - m_tc->InvalidateVideoMem(context->offset.fb, m_r, false, false); + g_texture_cache->InvalidateVideoMem(context->offset.fb, m_r, false, false); - m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } if (zm != 0xffffffff && ds) @@ -2287,16 +2270,14 @@ void GSRendererHW::Draw() ds->UpdateValidBits(GSLocalMemory::m_psm[context->ZBUF.PSM].fmsk); - m_tc->InvalidateVideoMem(context->offset.zb, m_r, false, false); + g_texture_cache->InvalidateVideoMem(context->offset.zb, m_r, false, false); - m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); + g_texture_cache->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); } // Restore modified offsets. if (is_split_texture_shuffle) { - m_context->FRAME.FBP = m_context->stack.FRAME.FBP; - m_context->TEX0.TBP0 = m_context->stack.TEX0.TBP0; m_context->offset.fb = GSOffset(GSLocalMemory::m_psm[m_context->FRAME.PSM].info, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM); m_context->offset.tex = GSOffset(GSLocalMemory::m_psm[m_context->TEX0.PSM].info, m_context->TEX0.TBP0, @@ -2335,7 +2316,7 @@ void GSRendererHW::Draw() #ifdef DISABLE_HW_TEXTURE_CACHE if (rt) - m_tc->Read(rt, m_r); + g_texture_cache->Read(rt, m_r); #endif } @@ -2742,159 +2723,197 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask() } } -void GSRendererHW::EmulateChannelShuffle(const GSTextureCache::Source* tex) +bool GSRendererHW::TestChannelShuffle(GSTextureCache::Target* src) { - // Uncomment to disable HLE emulation (allow to trace the draw call) - // m_channel_shuffle = false; + // We have to do the second test early here, because it might be a different source. + const bool shuffle = m_channel_shuffle || ( + PRIM->TME && m_context->TEX0.PSM == PSM_PSMT8 && // 8-bit texture draw + m_vt.m_primclass == GS_SPRITE_CLASS && // draw_sprite_tex + (((m_vt.m_max.p - m_vt.m_min.p) <= GSVector4(64.0f)).mask() & 0x3) == 0x3); // single_page + // This is a little redundant since it'll get called twice, but the only way to stop us wasting time on copies. + return (shuffle && EmulateChannelShuffle(src, true)); +} + +__ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only) +{ // First let's check we really have a channel shuffle effect - if (m_channel_shuffle) + if (m_game.title == CRC::PolyphonyDigitalGames) { - if (m_game.title == CRC::PolyphonyDigitalGames) + GL_INS("Gran Turismo RGB Channel"); + if (test_only) + return true; + + m_conf.ps.channel = ChannelFetch_RGB; + m_context->TEX0.TFX = TFX_DECAL; + m_conf.rt = src->m_texture; + } + else if (m_game.title == CRC::Tekken5) + { + if (m_context->FRAME.FBW == 1) { - GL_INS("Gran Turismo RGB Channel"); + // Used in stages: Secret Garden, Acid Rain, Moonlit Wilderness + GL_INS("Tekken5 RGB Channel"); + if (test_only) + return true; + m_conf.ps.channel = ChannelFetch_RGB; - m_context->TEX0.TFX = TFX_DECAL; - m_conf.rt = *tex->m_from_target; - } - else if (m_game.title == CRC::Tekken5) - { - if (m_context->FRAME.FBW == 1) - { - // Used in stages: Secret Garden, Acid Rain, Moonlit Wilderness - GL_INS("Tekken5 RGB Channel"); - m_conf.ps.channel = ChannelFetch_RGB; - m_context->FRAME.FBMSK = 0xFF000000; - // 12 pages: 2 calls by channel, 3 channels, 1 blit - // Minus current draw call - m_skip = 12 * (3 + 3 + 1) - 1; - m_conf.rt = *tex->m_from_target; - } - else - { - // Could skip model drawing if wrongly detected - m_channel_shuffle = false; - } - } - else if ((tex->m_texture->GetType() == GSTexture::Type::DepthStencil) && !(tex->m_32_bits_fmt)) - { - // So far 2 games hit this code path. Urban Chaos and Tales of Abyss - // UC: will copy depth to green channel - // ToA: will copy depth to alpha channel - if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000) - { - // Green channel is masked - GL_INS("Tales Of Abyss Crazyness (MSB 16b depth to Alpha)"); - m_conf.ps.tales_of_abyss_hle = 1; - } - else - { - GL_INS("Urban Chaos Crazyness (Green extraction)"); - m_conf.ps.urban_chaos_hle = 1; - } - } - else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3) - { - // Blood will tell. I think it is channel effect too but again - // implemented in a different way. I don't want to add more CRC stuff. So - // let's disable channel when the signature is different - // - // Note: Tales Of Abyss and Tekken5 could hit this path too. Those games are - // handled above. - GL_INS("Maybe not a channel!"); - m_channel_shuffle = false; - } - else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8)) - { - // Read either blue or Alpha. Let's go for Blue ;) - // MGS3/Kill Zone - GL_INS("Blue channel"); - m_conf.ps.channel = ChannelFetch_BLUE; - } - else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0)) - { - // Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so - // red. 2-3 is likely bottom so green (actually depends on texture base pointer offset) - const bool green = PRIM->FST && (m_vertex.buff[0].V & 32); - if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) - { - // Typically used in Terminator 3 - const int blue_mask = m_context->FRAME.FBMSK >> 24; - int blue_shift = -1; - - // Note: potentially we could also check the value of the clut - switch (blue_mask) - { - case 0xFF: ASSERT(0); break; - case 0xFE: blue_shift = 1; break; - case 0xFC: blue_shift = 2; break; - case 0xF8: blue_shift = 3; break; - case 0xF0: blue_shift = 4; break; - case 0xE0: blue_shift = 5; break; - case 0xC0: blue_shift = 6; break; - case 0x80: blue_shift = 7; break; - default: break; - } - - if (blue_shift >= 0) - { - const int green_mask = ~blue_mask & 0xFF; - const int green_shift = 8 - blue_shift; - - GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift); - m_conf.cb_ps.ChannelShuffle = GSVector4i(blue_mask, blue_shift, green_mask, green_shift); - m_conf.ps.channel = ChannelFetch_GXBY; - m_context->FRAME.FBMSK = 0x00FFFFFF; - } - else - { - GL_INS("Green channel (wrong mask) (fbmask %x)", blue_mask); - m_conf.ps.channel = ChannelFetch_GREEN; - } - } - else if (green) - { - GL_INS("Green channel"); - m_conf.ps.channel = ChannelFetch_GREEN; - } - else - { - // Pop - GL_INS("Red channel"); - m_conf.ps.channel = ChannelFetch_RED; - } + m_context->FRAME.FBMSK = 0xFF000000; + // 12 pages: 2 calls by channel, 3 channels, 1 blit + // Minus current draw call + m_skip = 12 * (3 + 3 + 1) - 1; + m_conf.rt = src->m_texture; } else { - GL_INS("Channel not supported"); + // Could skip model drawing if wrongly detected + if (test_only) + return false; + m_channel_shuffle = false; } } + else if ((src->m_texture->GetType() == GSTexture::Type::DepthStencil) && !src->m_32_bits_fmt) + { + // So far 2 games hit this code path. Urban Chaos and Tales of Abyss + // UC: will copy depth to green channel + // ToA: will copy depth to alpha channel + if ((m_context->FRAME.FBMSK & 0xFF0000) == 0xFF0000) + { + // Green channel is masked + GL_INS("Tales Of Abyss Crazyness (MSB 16b depth to Alpha)"); + if (test_only) + return true; + + m_conf.ps.tales_of_abyss_hle = 1; + } + else + { + GL_INS("Urban Chaos Crazyness (Green extraction)"); + if (test_only) + return true; + + m_conf.ps.urban_chaos_hle = 1; + } + } + else if (m_index.tail <= 64 && m_context->CLAMP.WMT == 3) + { + // Blood will tell. I think it is channel effect too but again + // implemented in a different way. I don't want to add more CRC stuff. So + // let's disable channel when the signature is different + // + // Note: Tales Of Abyss and Tekken5 could hit this path too. Those games are + // handled above. + GL_INS("Maybe not a channel!"); + if (test_only) + return false; + + m_channel_shuffle = false; + } + else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MAXU & 0x8) == 8)) + { + // Read either blue or Alpha. Let's go for Blue ;) + // MGS3/Kill Zone + GL_INS("Blue channel"); + if (test_only) + return true; + + m_conf.ps.channel = ChannelFetch_BLUE; + } + else if (m_context->CLAMP.WMS == 3 && ((m_context->CLAMP.MINU & 0x8) == 0)) + { + // Read either Red or Green. Let's check the V coordinate. 0-1 is likely top so + // red. 2-3 is likely bottom so green (actually depends on texture base pointer offset) + const bool green = PRIM->FST && (m_vertex.buff[0].V & 32); + if (green && (m_context->FRAME.FBMSK & 0x00FFFFFF) == 0x00FFFFFF) + { + // Typically used in Terminator 3 + const int blue_mask = m_context->FRAME.FBMSK >> 24; + int blue_shift = -1; + + // Note: potentially we could also check the value of the clut + switch (blue_mask) + { + case 0xFF: ASSERT(0); break; + case 0xFE: blue_shift = 1; break; + case 0xFC: blue_shift = 2; break; + case 0xF8: blue_shift = 3; break; + case 0xF0: blue_shift = 4; break; + case 0xE0: blue_shift = 5; break; + case 0xC0: blue_shift = 6; break; + case 0x80: blue_shift = 7; break; + default: break; + } + + if (blue_shift >= 0) + { + const int green_mask = ~blue_mask & 0xFF; + const int green_shift = 8 - blue_shift; + + GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift); + if (test_only) + return true; + + m_conf.cb_ps.ChannelShuffle = GSVector4i(blue_mask, blue_shift, green_mask, green_shift); + m_conf.ps.channel = ChannelFetch_GXBY; + m_context->FRAME.FBMSK = 0x00FFFFFF; + } + else + { + GL_INS("Green channel (wrong mask) (fbmask %x)", blue_mask); + if (test_only) + return true; + + m_conf.ps.channel = ChannelFetch_GREEN; + } + } + else if (green) + { + GL_INS("Green channel"); + if (test_only) + return true; + + m_conf.ps.channel = ChannelFetch_GREEN; + } + else + { + // Pop + GL_INS("Red channel"); + if (test_only) + return true; + + m_conf.ps.channel = ChannelFetch_RED; + } + } + else + { + GL_INS("Channel not supported"); + if (test_only) + return false; + + m_channel_shuffle = false; + } + + if (!m_channel_shuffle) + return false; // Effect is really a channel shuffle effect so let's cheat a little - if (m_channel_shuffle) - { - m_conf.tex = *tex->m_from_target; - if (m_conf.tex) - { - // Identify when we're sampling the current buffer, defer fixup for later. - m_tex_is_fb |= (m_conf.tex == m_conf.rt || m_conf.tex == m_conf.ds); - } + m_conf.tex = src->m_texture; - // Replace current draw with a fullscreen sprite - // - // Performance GPU note: it could be wise to reduce the size to - // the rendered size of the framebuffer + // Replace current draw with a fullscreen sprite + // + // Performance GPU note: it could be wise to reduce the size to + // the rendered size of the framebuffer - GSVertex* s = &m_vertex.buff[0]; - s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); - s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); - s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); - s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); + GSVertex* s = &m_vertex.buff[0]; + s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); + s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 16384); + s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 0); + s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + 16384); - m_vertex.head = m_vertex.tail = m_vertex.next = 2; - m_index.tail = 2; - } + m_vertex.head = m_vertex.tail = m_vertex.next = 2; + m_index.tail = 2; + return true; } void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass) @@ -3500,8 +3519,54 @@ __ri static constexpr u8 EffectiveClamp(u8 clamp, bool has_region) return (clamp >= CLAMP_REGION_CLAMP && has_region) ? (clamp ^ 3) : clamp; } -void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) +__ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds, GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTexture*& src_copy) { + // don't overwrite the texture when using channel shuffle, but keep the palette + if (!m_channel_shuffle) + m_conf.tex = tex->m_texture; + m_conf.pal = tex->m_palette; + + if (m_game.title == CRC::ICO) + { + const GSVertex* v = &m_vertex.buff[0]; + const GSVideoMode mode = GetVideoMode(); + if (tex && m_vt.m_primclass == GS_SPRITE_CLASS && m_vertex.next == 2 && PRIM->ABE && // Blend texture + ((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448 + (v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512 + tex->m_TEX0.PSM == PSM_PSMT8H) // i.e. read the alpha channel of a 32 bits texture + { + // Note potentially we can limit to TBP0:0x2800 + + // Depth buffer was moved so GS will invalide it which means a + // downscale. ICO uses the MSB depth bits as the texture alpha + // channel. However this depth of field effect requires + // texel:pixel mapping accuracy. + // + // Use an HLE shader to sample depth directly as the alpha channel + GL_INS("ICO sample depth as alpha"); + m_conf.require_full_barrier = true; + // Extract the depth as palette index + m_conf.ps.depth_fmt = 1; + m_conf.ps.channel = ChannelFetch_BLUE; + m_conf.tex = ds->m_texture; + + // We need the palette to convert the depth to the correct alpha value. + if (!tex->m_palette) + { + const u16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal; + g_texture_cache->AttachPaletteToSource(tex, pal, true); + m_conf.pal = tex->m_palette; + } + } + } + + // Hazard handling (i.e. reading from the current RT/DS). + GSTextureCache::SourceRegion source_region = tex->GetRegion(); + bool target_region = (tex->IsFromTarget() && source_region.HasEither()); + GSVector2i unscaled_size = target_region ? tex->GetRegionSize() : tex->GetUnscaledSize(); + float scale = tex->GetScale(); + HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy); + // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; @@ -3510,20 +3575,21 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) // Redundant clamp tests are restricted to local memory/1x sources only, if we're from a target, // we keep the shader clamp. See #5851 on github, and the note in Draw(). [[maybe_unused]] static constexpr const char* clamp_modes[] = {"REPEAT", "CLAMP", "REGION_CLAMP", "REGION_REPEAT"}; - const bool redundant_wms = !tex->m_target && IsRedundantClamp(m_context->CLAMP.WMS, m_context->CLAMP.MINU, + const bool redundant_wms = IsRedundantClamp(m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, tex->m_TEX0.TW); - const bool redundant_wmt = !tex->m_target && IsRedundantClamp(m_context->CLAMP.WMT, m_context->CLAMP.MINV, + const bool redundant_wmt = IsRedundantClamp(m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, tex->m_TEX0.TH); - const u8 wms = EffectiveClamp(m_context->CLAMP.WMS, tex->m_region.HasX() || redundant_wms); - const u8 wmt = EffectiveClamp(m_context->CLAMP.WMT, tex->m_region.HasY() || redundant_wmt); - const bool complex_wms_wmt = !!((wms | wmt) & 2); - GL_CACHE("WMS: %s [%s%s] WMT: %s [%s%s] Complex: %d MINU: %d MAXU: %d MINV: %d MAXV: %d", + const u8 wms = EffectiveClamp(m_context->CLAMP.WMS, !tex->m_target && (source_region.HasX() || redundant_wms)); + const u8 wmt = EffectiveClamp(m_context->CLAMP.WMT, !tex->m_target && (source_region.HasY() || redundant_wmt)); + const bool complex_wms_wmt = !!((wms | wmt) & 2) || target_region; + GL_CACHE("WMS: %s [%s%s] WMT: %s [%s%s] Complex: %d TargetRegion: %d MINU: %d MAXU: %d MINV: %d MAXV: %d", clamp_modes[m_context->CLAMP.WMS], redundant_wms ? "redundant," : "", clamp_modes[wms], clamp_modes[m_context->CLAMP.WMT], redundant_wmt ? "redundant," : "", clamp_modes[wmt], - complex_wms_wmt, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, m_context->CLAMP.MINV, m_context->CLAMP.MAXV); + complex_wms_wmt, target_region, + m_context->CLAMP.MINU, m_context->CLAMP.MAXU, m_context->CLAMP.MINV, m_context->CLAMP.MAXV); const bool need_mipmap = IsMipMapDraw(); - const bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt || psm.depth; + const bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt || psm.depth || target_region; const bool trilinear_manual = need_mipmap && GSConfig.HWMipmap == HWMipmapLevel::Full; bool bilinear = m_vt.IsLinear(); @@ -3559,8 +3625,8 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) } // 1 and 0 are equivalent - m_conf.ps.wms = (wms & 2) ? wms : 0; - m_conf.ps.wmt = (wmt & 2) ? wmt : 0; + m_conf.ps.wms = (wms & 2 || target_region) ? wms : 0; + m_conf.ps.wmt = (wmt & 2 || target_region) ? wmt : 0; // Depth + bilinear filtering isn't done yet (And I'm not sure we need it anyway but a game will prove me wrong) // So of course, GTA set the linear mode, but sampling is done at texel center so it is equivalent to nearest sampling @@ -3681,10 +3747,7 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) m_conf.ps.tcc = m_context->TEX0.TCC; m_conf.ps.ltf = bilinear && shader_emulated_sampler; - m_conf.ps.point_sampler = g_gs_device->Features().broken_point_sampler && (!bilinear || shader_emulated_sampler); - - const float scale = tex->GetScale(); - const GSVector2i unscaled_size = tex->GetUnscaledSize(); + m_conf.ps.point_sampler = g_gs_device->Features().broken_point_sampler && !target_region && (!bilinear || shader_emulated_sampler); const int tw = static_cast(1 << m_context->TEX0.TW); const int th = static_cast(1 << m_context->TEX0.TH); @@ -3697,17 +3760,27 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) m_conf.cb_ps.STScale = GSVector2(static_cast(miptw) / static_cast(unscaled_size.x), static_cast(mipth) / static_cast(unscaled_size.y)); - if (tex->m_region.HasX()) + if (target_region) { - m_conf.cb_ps.STRange.x = static_cast(tex->m_region.GetMinX()) / static_cast(miptw); - m_conf.cb_ps.STRange.z = static_cast(miptw) / static_cast(tex->m_region.GetWidth()); - m_conf.ps.adjs = 1; + // Use texelFetch() and clamp. Subtract one because the upper bound is exclusive. + m_conf.cb_ps.STRange = GSVector4(tex->GetRegionRect() - GSVector4i::cxpr(0, 0, 1, 1)) * GSVector4(scale); + m_conf.ps.region_rect = true; } - if (tex->m_region.HasY()) + else if (!tex->m_target) { - m_conf.cb_ps.STRange.y = static_cast(tex->m_region.GetMinY()) / static_cast(mipth); - m_conf.cb_ps.STRange.w = static_cast(mipth) / static_cast(tex->m_region.GetHeight()); - m_conf.ps.adjt = 1; + // Targets aren't currently offset, so STScale takes care of it. + if (source_region.HasX()) + { + m_conf.cb_ps.STRange.x = static_cast(source_region.GetMinX()) / static_cast(miptw); + m_conf.cb_ps.STRange.z = static_cast(miptw) / static_cast(source_region.GetWidth()); + m_conf.ps.adjs = 1; + } + if (source_region.HasY()) + { + m_conf.cb_ps.STRange.y = static_cast(source_region.GetMinY()) / static_cast(mipth); + m_conf.cb_ps.STRange.w = static_cast(mipth) / static_cast(source_region.GetHeight()); + m_conf.ps.adjt = 1; + } } m_conf.ps.fst = !!PRIM->FST; @@ -3740,7 +3813,7 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) } else if (trilinear_auto) { - tex->m_texture->GenerateMipmapsIfNeeded(); + m_conf.tex->GenerateMipmapsIfNeeded(); } // TC Offset Hack @@ -3750,8 +3823,8 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) m_conf.cb_vs.texture_scale = GSVector2(tc_oh_ts.x, tc_oh_ts.y); // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader - m_conf.sampler.tau = (wms == CLAMP_REPEAT); - m_conf.sampler.tav = (wmt == CLAMP_REPEAT); + m_conf.sampler.tau = (wms == CLAMP_REPEAT && !target_region); + m_conf.sampler.tav = (wmt == CLAMP_REPEAT && !target_region); if (shader_emulated_sampler) { m_conf.sampler.biln = 0; @@ -3779,46 +3852,230 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) // clamp to base level if we're not providing or generating mipmaps // manual trilinear causes the chain to be uploaded, auto causes it to be generated m_conf.sampler.lodclamp = !(trilinear_manual || trilinear_auto); +} - // don't overwrite the texture when using channel shuffle, but keep the palette - if (!m_channel_shuffle) - m_conf.tex = tex->m_texture; - m_conf.pal = tex->m_palette; - +__ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds, + const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, + bool& target_region, GSVector2i& unscaled_size, float& scale, GSTexture*& src_copy) +{ // Detect framebuffer read that will need special handling - if (m_tex_is_fb) + const GSTextureCache::Target* src_target = nullptr; + if (m_conf.tex == m_conf.rt) { - if (m_conf.tex == m_conf.rt) + // Can we read the framebuffer directly? (i.e. sample location matches up). + if (CanUseTexIsFB(rt, tex)) { - // This pattern is used by several games to emulate a stencil (shadow) - // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 - // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 - GL_DBG("Source and Target are the same! Let's sample the framebuffer"); m_conf.tex = nullptr; m_conf.ps.tex_is_fb = true; if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) m_conf.require_one_barrier = true; else m_conf.require_full_barrier = true; + + unscaled_size = rt->GetUnscaledSize(); + scale = rt->GetScale(); + return; } - else if (m_conf.tex == m_conf.ds) + + GL_CACHE("Source is render target, taking copy."); + src_target = rt; + } + else if (m_conf.tex == m_conf.ds) + { + // GL, Vulkan (in General layout), not DirectX! + const bool can_read_current_depth_buffer = false; + + // If this is our current Z buffer, we might not be able to read it directly if it's being written to. + // Rather than leaving the backend to do it, we'll check it here. + if (can_read_current_depth_buffer && (m_context->ZBUF.ZMSK || m_context->TEST.ZTST == ZTST_NEVER)) { - // if depth testing is disabled, we don't need to copy, and can just unbind the depth buffer - // no need for a barrier for GL either, since it's not bound to depth and texture concurrently - // otherwise, the backend should recognise the hazard, and copy the buffer (D3D/Vulkan). - if (m_conf.depth.ztst == ZTST_ALWAYS) - { - m_conf.ds = nullptr; - m_tex_is_fb = false; - } + // Safe to read! + GL_CACHE("Source is depth buffer, not writing, safe to read."); + unscaled_size = ds->GetUnscaledSize(); + scale = ds->GetScale(); + return; + } + + // Can't safely read the depth buffer, so we need to take a copy of it. + GL_CACHE("Source is depth buffer, unsafe to read, taking copy."); + src_target = ds; + } + else + { + // No match. + return; + } + + // We need to copy. Try to cut down the source range as much as possible so we don't copy texels we're not reading. + const GSVector2i& src_unscaled_size = src_target->GetUnscaledSize(); + const GSVector4i src_bounds = src_target->GetUnscaledRect(); + GSVector4i copy_range; + GSVector2i copy_size; + GSVector2i copy_dst_offset; + + // Shuffles take the whole target. This should've already been halved. + // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. + // Restricting it also breaks Tom and Jerry... + if (m_channel_shuffle || tex->m_texture->GetType() == GSTexture::Type::DepthStencil) + { + copy_range = src_bounds; + copy_size = src_unscaled_size; + GSVector4i::storel(©_dst_offset, copy_range); + } + else + { + // If we're using TW/TH-based sizing, take the size from TEX0, not the target. + const GSVector2i tex_size = GSVector2i(1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + copy_size.x = std::min(tex_size.x, src_unscaled_size.x); + copy_size.y = std::min(tex_size.y, src_unscaled_size.y); + + // Use the texture min/max to get the copy range. + copy_range = tmm.coverage; + + // Texture size above might be invalid (Timesplitters 2), extend if needed. + if (m_context->CLAMP.WMS >= CLAMP_REGION_CLAMP && copy_range.z > copy_size.x) + copy_size.x = src_unscaled_size.x; + if (m_context->CLAMP.WMT >= CLAMP_REGION_CLAMP && copy_range.w > copy_size.y) + copy_size.y = src_unscaled_size.y; + + // Texture shuffles might read up to +/- 8 pixels on either side. + if (m_texture_shuffle) + copy_range = (copy_range + GSVector4i::cxpr(-8, 0, 8, 0)).max_i32(GSVector4i::zero()); + + // Apply target region offset. + // TODO: Shrink the output texture to only the copy size. + // Currently there's precision issues when using point sampling with normalized coordinates. + // Once we move those over to texelFetch(), we should be able to shrink the size of the copy textures. + if (target_region) + { + // Create a new texture using only the carved out region. Might save a bit of GPU time if we're lucky. + const GSVector4i src_offset = GSVector4i(source_region.GetMinX(), source_region.GetMinY()).xyxy(); + copy_range += src_offset; + copy_range = copy_range.rintersect(source_region.GetRect(src_unscaled_size.x, src_unscaled_size.y)); + GL_CACHE("Applying target region at copy: %dx%d @ %d,%d => %d,%d", copy_range.width(), copy_range.height(), + tmm.coverage.x, tmm.coverage.y, copy_range.x, copy_range.y); + + // Remove target region flag, we don't need to offset the coordinates anymore. + source_region = {}; + target_region = false; + + // Make sure it's not out of the source's bounds. + copy_range = copy_range.rintersect(src_bounds); + + // Unapply the region offset for the destination coordinates. + const GSVector4i dst_range = copy_range - src_offset; + GSVector4i::storel(©_dst_offset, dst_range); + + // We shouldn't need a larger texture because of the TS2 check above, but just in case. + GSVector4i::storel(©_size, GSVector4i(copy_size).max_i32(dst_range.zwzw())); } else { - // weird... we detected a fb read, but didn't end up using it? - DevCon.WriteLn("Tex-is-FB set but not used?"); - m_tex_is_fb = false; + // TODO: We also could use source region here to offset the coordinates. + copy_range = copy_range.rintersect(src_bounds); + GSVector4i::storel(©_dst_offset, copy_range); } } + + if (copy_range.rempty()) + { + // Reading outside of the RT range. + GL_CACHE("ERROR: Reading outside of the RT range, using null texture."); + unscaled_size = GSVector2i(1, 1); + scale = 1.0f; + m_conf.tex = nullptr; + m_conf.ps.tfx = 4; + return; + } + + unscaled_size = copy_size; + scale = src_target->GetScale(); + GL_CACHE("Copy size: %dx%d, range: %d,%d -> %d,%d (%dx%d) @ %.1f", copy_size.x, copy_size.y, copy_range.x, + copy_range.y, copy_range.z, copy_range.w, copy_range.width(), copy_range.height(), scale); + + const GSVector2i scaled_copy_size = GSVector2i(static_cast(std::ceil(static_cast(copy_size.x) * scale)), + static_cast(std::ceil(static_cast(copy_size.y) * scale))); + const GSVector4i scaled_copy_range = GSVector4i((GSVector4(copy_range) * GSVector4(scale)).ceil()); + const GSVector2i scaled_copy_dst_offset = + GSVector2i(static_cast(std::ceil(static_cast(copy_dst_offset.x) * scale)), + static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); + + src_copy = src_target->m_texture->IsDepthStencil() ? + g_gs_device->CreateDepthStencil( + scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + g_gs_device->CreateTexture( + scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true); + g_gs_device->CopyRect( + src_target->m_texture, src_copy, scaled_copy_range, scaled_copy_dst_offset.x, scaled_copy_dst_offset.y); + m_conf.tex = src_copy; +} + +bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextureCache::Source* tex) const +{ + // Minimum blending or no barriers -> we can't use tex-is-fb. + if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier) + { + GL_CACHE("Can't use tex-is-fb due to no barriers."); + return false; + } + + // If we're a shuffle, tex-is-fb is always fine. + if (m_texture_shuffle || m_channel_shuffle) + { + GL_CACHE("Activating tex-is-fb for %s shuffle.", m_texture_shuffle ? "texture" : "channel"); + return true; + } + + // Texture is actually the frame buffer. Stencil emulation to compute shadow (Jak series/tri-ace game) + // Will hit the "m_ps_sel.tex_is_fb = 1" path in the draw + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + // This pattern is used by several games to emulate a stencil (shadow) + // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 + // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 + if (m_context->FRAME.FBMSK == 0x00FFFFFF) + { + GL_CACHE("Tex-is-fb hack for Jak"); + return true; + } + + GL_CACHE("Triangle draw, not using tex-is-fb"); + return false; + } + else if (m_vt.m_primclass == GS_SPRITE_CLASS) + { + // No bilinear for tex-is-fb. + if (m_vt.IsLinear()) + { + GL_CACHE("Can't use tex-is-fb due to bilinear sampling."); + return false; + } + + // Can't do tex-is-fb if paletted and we're not a shuffle (C32 -> P8). + // This one shouldn't happen anymore, because all conversion should be done already. + const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; + const GSLocalMemory::psm_t& rt_psm = GSLocalMemory::m_psm[rt->m_TEX0.PSM]; + if (tex_psm.pal > 0 && tex_psm.bpp < rt_psm.bpp) + { + Console.Error("Draw %d: Can't use tex-is-fb due to palette conversion", s_n); + return true; + } + + // Make sure that we're not sampling away from the area we're rendering. + // We need to take the absolute here, because Beyond Good and Evil undithers itself using a -1,-1 offset. + const GSVector4 diff(m_vt.m_min.p.xyxy(m_vt.m_max.p) - m_vt.m_min.t.xyxy(m_vt.m_max.t)); + GL_CACHE("Coord diff: %f,%f", diff.x, diff.y); + if ((diff.abs() < GSVector4(1.0f)).alltrue()) + { + GL_CACHE("Sampling from rendered texel, using tex-is-fb."); + return true; + } + + GL_CACHE("Coord diff too large, not using tex-is-fb."); + return false; + } + + return false; } void GSRendererHW::EmulateATST(float& AREF, GSHWDrawConfig::PSSelector& ps, bool pass_2) @@ -3873,7 +4130,7 @@ void GSRendererHW::ResetStates() memset(&m_conf, 0, reinterpret_cast(&m_conf.cb_vs) - reinterpret_cast(&m_conf)); } -void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex) +__ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex, const TextureMinMaxResult& tmm) { #ifdef ENABLE_OGL_DEBUG const GSVector4i area_out = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in)); @@ -3909,7 +4166,8 @@ void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* // Warning it must be done at the begining because it will change the // vertex list (it will interact with PrimitiveOverlap and accurate // blending) - EmulateChannelShuffle(tex); + if (m_channel_shuffle && tex && tex->m_from_target) + EmulateChannelShuffle(tex->m_from_target, false); // Upscaling hack to avoid various line/grid issues MergeSprite(tex); @@ -4190,49 +4448,16 @@ void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* m_conf.alpha_second_pass.ps_aref = aref; } + GSTexture* tex_copy = nullptr; if (tex) { - EmulateTextureSampler(tex); + EmulateTextureSampler(rt, ds, tex, tmm, tex_copy); } else { m_conf.ps.tfx = 4; } - if (m_game.title == CRC::ICO) - { - const GSVertex* v = &m_vertex.buff[0]; - const GSVideoMode mode = GetVideoMode(); - if (tex && m_vt.m_primclass == GS_SPRITE_CLASS && m_vertex.next == 2 && PRIM->ABE && // Blend texture - ((v[1].U == 8200 && v[1].V == 7176 && mode == GSVideoMode::NTSC) || // at display resolution 512x448 - (v[1].U == 8200 && v[1].V == 8200 && mode == GSVideoMode::PAL)) && // at display resolution 512x512 - tex->m_TEX0.PSM == PSM_PSMT8H) // i.e. read the alpha channel of a 32 bits texture - { - // Note potentially we can limit to TBP0:0x2800 - - // Depth buffer was moved so GS will invalide it which means a - // downscale. ICO uses the MSB depth bits as the texture alpha - // channel. However this depth of field effect requires - // texel:pixel mapping accuracy. - // - // Use an HLE shader to sample depth directly as the alpha channel - GL_INS("ICO sample depth as alpha"); - m_conf.require_full_barrier = true; - // Extract the depth as palette index - m_conf.ps.depth_fmt = 1; - m_conf.ps.channel = ChannelFetch_BLUE; - m_conf.tex = ds->m_texture; - - // We need the palette to convert the depth to the correct alpha value. - if (!tex->m_palette) - { - const u16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal; - m_tc->AttachPaletteToSource(tex, pal, true); - m_conf.pal = tex->m_palette; - } - } - } - if (features.framebuffer_fetch) { // Intel GPUs on Metal lock up if you try to use DSB and framebuffer fetch at once @@ -4377,6 +4602,8 @@ void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* g_gs_device->RenderHW(m_conf); + if (tex_copy) + g_gs_device->Recycle(tex_copy); if (temp_ds) g_gs_device->Recycle(temp_ds); } @@ -4403,7 +4630,7 @@ bool GSRendererHW::HasEEUpload(GSVector4i r) sok.elems[1].psm = m_context->TEX0.PSM; sok.elems[1].rect = r; - rect = m_tc->ComputeSurfaceOffset(sok).b2a_offset; + rect = g_texture_cache->ComputeSurfaceOffset(sok).b2a_offset; } if (rect.rintersect(r).eq(r)) return true; @@ -4491,7 +4718,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() if (PRIM->TME) { // If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need. - const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage; + const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear(), false).coverage; // If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download. if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled) @@ -4499,7 +4726,7 @@ GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw() if (HasEEUpload(r)) return CLUTDrawTestResult::CLUTDrawOnCPU; - GSTextureCache::Target* tgt = m_tc->GetExactTarget(m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM); + GSTextureCache::Target* tgt = g_texture_cache->GetExactTarget(m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM); if (tgt) { bool is_dirty = false; @@ -4596,13 +4823,13 @@ bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_t // Make sure this isn't something we've actually rendered to (e.g. a texture shuffle). if (PRIM->TME) { - GSTextureCache::Target* src_target = m_tc->GetTargetWithSharedBits(m_context->TEX0.TBP0, m_context->TEX0.PSM); + GSTextureCache::Target* src_target = g_texture_cache->GetTargetWithSharedBits(m_context->TEX0.TBP0, m_context->TEX0.PSM); if (src_target) { // If the EE has written over our sample area, we're fine to do this on the CPU, despite the target. if (!src_target->m_dirty.empty()) { - const GSVector4i tr(GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage); + const GSVector4i tr(GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear(), false).coverage); for (GSDirtyRect& rc : src_target->m_dirty) { if (!rc.GetDirtyRect(m_context->TEX0).rintersect(tr).rempty()) @@ -4830,9 +5057,9 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc // Copy back the texture into the GS mem. I don't know why but it will be // reuploaded again later - m_tc->Read(tex, r_texture.rintersect(tex->m_texture->GetRect())); + g_texture_cache->Read(tex, r_texture.rintersect(tex->m_texture->GetRect())); - m_tc->InvalidateVideoMemSubTarget(_rt); + g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 3985949ce0..f213c733e3 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -84,14 +84,21 @@ private: template void RoundSpriteOffset(); - void DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex); + void DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Target* ds, GSTextureCache::Source* tex, const TextureMinMaxResult& tmm); void ResetStates(); void SetupIA(float target_scale, float sx, float sy); void EmulateTextureShuffleAndFbmask(); - void EmulateChannelShuffle(const GSTextureCache::Source* tex); + bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); void EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass); - void EmulateTextureSampler(const GSTextureCache::Source* tex); + + void EmulateTextureSampler(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds, + GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTexture*& src_copy); + void HandleTextureHazards(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds, + const GSTextureCache::Source* tex, const TextureMinMaxResult& tmm, GSTextureCache::SourceRegion& source_region, + bool& target_region, GSVector2i& unscaled_size, float& scale, GSTexture*& src_copy); + bool CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextureCache::Source* tex) const; + void EmulateZbuffer(); void EmulateATST(float& AREF, GSHWDrawConfig::PSSelector& ps, bool pass_2); @@ -101,9 +108,7 @@ private: bool IsSplitTextureShuffle(); GSVector4i GetSplitTextureShuffleDrawRect() const; - GSTextureCache* m_tc; GSVector4i m_r = {}; - GSTextureCache::Source* m_src = nullptr; // CRC Hacks bool IsBadFrame(); @@ -120,7 +125,6 @@ private: u32 m_last_channel_shuffle_fbmsk = 0; bool m_channel_shuffle = false; - bool m_tex_is_fb = false; bool m_userhacks_tcoffset = false; float m_userhacks_tcoffset_x = 0.0f; float m_userhacks_tcoffset_y = 0.0f; @@ -143,7 +147,6 @@ public: virtual ~GSRendererHW() override; __fi static GSRendererHW* GetInstance() { return static_cast(g_gs_renderer.get()); } - __fi GSTextureCache* GetTextureCache() const { return m_tc; } void Destroy() override; @@ -177,9 +180,6 @@ public: void ReadbackTextureCache() override; GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, float* scale, const GSVector2i& size) override; - // Called by the texture cache to know if current texture is useful - bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0); - - // Called by the texture cache when optimizing the copy range for sources - bool IsPossibleTextureShuffle(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) const; + /// Called by the texture cache to know for certain whether there is a channel shuffle. + bool TestChannelShuffle(GSTextureCache::Target* src); }; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp index 2bc99270ae..4afe124252 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHWMultiISA.cpp @@ -199,7 +199,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc) GIFRegTEX0 TEX0 = context->GetSizeFixedTEX0(vt.m_min.t.xyxy(vt.m_max.t), vt.IsLinear(), mipmap); - const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage; + const GSVector4i r = hw.GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; if (!hw.m_sw_texture[0]) hw.m_sw_texture[0] = std::make_unique(0, TEX0, env.TEXA); @@ -300,7 +300,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc) else hw.m_sw_texture[i]->Reset(gd.sel.tw + 3, MIP_TEX0, env.TEXA); - GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf).coverage; + GSVector4i r = hw.GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; hw.m_sw_texture[i]->Update(r); gd.tex[i] = hw.m_sw_texture[i]->m_buff; } @@ -554,7 +554,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc) static_cast(hw.m_sw_rasterizer.get())->Draw(data); if (invalidate_tc) - hw.m_tc->InvalidateVideoMem(context->offset.fb, bbox); + g_texture_cache->InvalidateVideoMem(context->offset.fb, bbox); return true; } \ No newline at end of file diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index c11bc9c60f..3732f74d4d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -31,6 +31,8 @@ #include #endif +std::unique_ptr g_texture_cache; + static u8* s_unswizzle_buffer; GSTextureCache::GSTextureCache() @@ -479,116 +481,6 @@ void GSTextureCache::DirtyRectByPage(u32 sbp, u32 spsm, u32 sbw, Target* t, GSVe AddDirtyRectTarget(t, new_rect, t->m_TEX0.PSM, t->m_TEX0.TBW, rgba); } -GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette) -{ - if (GSConfig.UserHacks_DisableDepthSupport) - { - GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); - throw GSRecoverableError(); - } - - const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; - - Source* src = NULL; - Target* dst = NULL; - - // Check only current frame, I guess it is only used as a postprocessing effect - const u32 bp = TEX0.TBP0; - const u32 psm = TEX0.PSM; - - for (auto t : m_dst[DepthStencil]) - { - if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - { - ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); - if (t->m_age == 0) - { - // Perfect Match - dst = t; - break; - } - else if (t->m_age == 1) - { - // Better than nothing (Full Spectrum Warrior) - dst = t; - } - } - } - - if (!dst) - { - // Retry on the render target (Silent Hill 4) - for (auto t : m_dst[RenderTarget]) - { - // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? - if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) - { - ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); - dst = t; - break; - } - } - } - - if (dst) - { - GL_CACHE("TC depth: dst %s hit: %d (0x%x, %s)", to_string(dst->m_type), - dst->m_texture ? dst->m_texture->GetID() : 0, - TEX0.TBP0, psm_str(psm)); - - // Create a shared texture source - src = new Source(TEX0, TEXA); - src->m_texture = dst->m_texture; - src->m_scale = dst->m_scale; - src->m_unscaled_size = dst->m_unscaled_size; - src->m_shared_texture = true; - src->m_target = true; // So renderer can check if a conversion is required - src->m_from_target = &dst->m_texture; // avoid complex condition on the renderer - src->m_from_target_TEX0 = dst->m_TEX0; - src->m_32_bits_fmt = dst->m_32_bits_fmt; - src->m_valid_rect = dst->m_valid; - src->m_end_block = dst->m_end_block; - - // Insert the texture in the hash set to keep track of it. But don't bother with - // texture cache list. It means that a new Source is created everytime we need it. - // If it is too expensive, one could cut memory allocation in Source constructor for this - // use case. - if (palette) - { - AttachPaletteToSource(src, psm_s.pal, true); - } - - m_src.m_surfaces.insert(src); - } - else if (g_gs_renderer->m_game.title == CRC::SVCChaos || g_gs_renderer->m_game.title == CRC::KOF2002) - { - // SVCChaos black screen & KOF2002 blue screen on main menu, regardless of depth enabled or disabled. - return LookupSource(TEX0, TEXA, CLAMP, r, nullptr); - } - else - { - GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm)); - // Possible ? In this case we could call LookupSource - // Or just put a basic texture - // src->m_texture = g_gs_device->CreateTexture(tw, th); - // In all cases rendering will be broken - // - // Note: might worth to check previous frame - // Note: otherwise return NULL and skip the draw - - // Full Spectrum Warrior: first draw call of cut-scene rendering - // The game tries to emulate a texture shuffle with an old depth buffer - // (don't exists yet for us due to the cache) - // Rendering is nicer (less garbage) if we skip the draw call. - throw GSRecoverableError(); - } - - ASSERT(src->m_texture); - ASSERT(src->m_scale == (dst ? dst->m_scale : 1.0f)); - - return src; -} - __ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSLocalMemory::psm_t& psm_s, const u32* clut, const GSTexture* gpu_clut, const GSVector2i& compare_lod, const GSTextureCache::SourceRegion& region, u32 fixed_tex0, FastList& map) @@ -642,6 +534,130 @@ __ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, cons return nullptr; } +GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette) +{ + if (GSConfig.UserHacks_DisableDepthSupport) + { + GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); + throw GSRecoverableError(); + } + + GL_CACHE("TC: Lookup Depth Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, + r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH); + + const SourceRegion region = SourceRegion::Create(TEX0, CLAMP); + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + Source* src = FindSourceInMap(TEX0, TEXA, psm_s, nullptr, nullptr, GSVector2i(0, 0), region, + region.IsFixedTEX0(TEX0), m_src.m_map[TEX0.TBP0 >> 5]); + if (src) + { + GL_CACHE("TC: src hit: (0x%x, %s)", TEX0.TBP0, psm_str(TEX0.PSM)); + src->Update(r); + return src; + } + + Target* dst = nullptr; + + // Check only current frame, I guess it is only used as a postprocessing effect + const u32 bp = TEX0.TBP0; + const u32 psm = TEX0.PSM; + + for (auto t : m_dst[DepthStencil]) + { + if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); + if (t->m_age == 0) + { + // Perfect Match + dst = t; + break; + } + else if (t->m_age == 1) + { + // Better than nothing (Full Spectrum Warrior) + dst = t; + } + } + } + + if (!dst) + { + // Retry on the render target (Silent Hill 4) + for (auto t : m_dst[RenderTarget]) + { + // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? + if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth); + dst = t; + break; + } + } + } + + if (dst) + { + GL_CACHE("TC depth: dst %s hit: (0x%x, %s)", to_string(dst->m_type), + TEX0.TBP0, psm_str(psm)); + + // Create a shared texture source + src = new Source(TEX0, TEXA); + src->m_texture = dst->m_texture; + src->m_scale = dst->m_scale; + src->m_unscaled_size = dst->m_unscaled_size; + src->m_shared_texture = true; + src->m_target = true; // So renderer can check if a conversion is required + src->m_from_target = dst; // avoid complex condition on the renderer + src->m_from_target_TEX0 = dst->m_TEX0; + src->m_32_bits_fmt = dst->m_32_bits_fmt; + src->m_valid_rect = dst->m_valid; + src->m_end_block = dst->m_end_block; + + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + { + m_temporary_source = src; + } + else + { + src->SetPages(); + m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex); + } + + if (palette) + { + AttachPaletteToSource(src, psm_s.pal, true); + } + } + else if (g_gs_renderer->m_game.title == CRC::SVCChaos || g_gs_renderer->m_game.title == CRC::KOF2002) + { + // SVCChaos black screen & KOF2002 blue screen on main menu, regardless of depth enabled or disabled. + return LookupSource(TEX0, TEXA, CLAMP, r, nullptr); + } + else + { + GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm)); + // Possible ? In this case we could call LookupSource + // Or just put a basic texture + // src->m_texture = g_gs_device->CreateTexture(tw, th); + // In all cases rendering will be broken + // + // Note: might worth to check previous frame + // Note: otherwise return NULL and skip the draw + + // Full Spectrum Warrior: first draw call of cut-scene rendering + // The game tries to emulate a texture shuffle with an old depth buffer + // (don't exists yet for us due to the cache) + // Rendering is nicer (less garbage) if we skip the draw call. + throw GSRecoverableError(); + } + + ASSERT(src->m_texture); + ASSERT(src->m_scale == (dst ? dst->m_scale : 1.0f)); + + return src; +} + GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod) { GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH); @@ -652,48 +668,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con const u32* const clut = g_gs_renderer->m_mem.m_clut; GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr; - SourceRegion region = {}; - if (CLAMP.WMS == CLAMP_REGION_CLAMP && CLAMP.MAXU >= CLAMP.MINU) - { - // Another Lupin case here, it uses region clamp with UV (not ST), puts a clamp region further - // into the texture, but a smaller TW/TH. Catch this by looking for a clamp range above TW. - const u32 rw = CLAMP.MAXU - CLAMP.MINU + 1; - if (rw < (1u << TEX0.TW) || CLAMP.MAXU >= (1u << TEX0.TW)) - { - region.SetX(CLAMP.MINU, CLAMP.MAXU + 1); - GL_CACHE("TC: Region clamp optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); - } - } - else if (CLAMP.WMS == CLAMP_REGION_REPEAT && CLAMP.MINU != 0) - { - // Lupin the 3rd is really evil, it sets TW/TH to the texture size, but then uses region repeat - // to offset the actual texture data to elsewhere. So, we'll just force any cases like this down - // the region texture path. - const u32 rw = ((CLAMP.MINU | CLAMP.MAXU) - CLAMP.MAXU) + 1; - if (rw < (1u << TEX0.TW) || (CLAMP.MAXU != 0 && (rw <= (1u << TEX0.TW)))) - { - region.SetX(CLAMP.MAXU, (CLAMP.MINU | CLAMP.MAXU) + 1); - GL_CACHE("TC: Region repeat optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); - } - } - if (CLAMP.WMT == CLAMP_REGION_CLAMP && CLAMP.MAXV >= CLAMP.MINV) - { - const u32 rh = CLAMP.MAXV - CLAMP.MINV + 1; - if (rh < (1u << TEX0.TH) || CLAMP.MAXV >= (1u << TEX0.TH)) - { - region.SetY(CLAMP.MINV, CLAMP.MAXV + 1); - GL_CACHE("TC: Region clamp optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); - } - } - else if (CLAMP.WMT == CLAMP_REGION_REPEAT && CLAMP.MINV != 0) - { - const u32 rh = ((CLAMP.MINV | CLAMP.MAXV) - CLAMP.MAXV) + 1; - if (rh < (1u << TEX0.TH) || (CLAMP.MAXV != 0 && (rh <= (1u << TEX0.TH)))) - { - region.SetY(CLAMP.MAXV, (CLAMP.MINV | CLAMP.MAXV) + 1); - GL_CACHE("TC: Region repeat optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); - } - } + const SourceRegion region = SourceRegion::Create(TEX0, CLAMP); // Prevent everything going to rubbish if a game somehow sends a TW/TH above 10, and region isn't being used. if ((TEX0.TW > 10 && !region.HasX()) || (TEX0.TH > 10 && !region.HasY())) @@ -1076,12 +1051,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con #ifdef ENABLE_OGL_DEBUG if (dst) { - GL_CACHE("TC: dst %s hit (%s, OFF <%d,%d>): %d (0x%x, %s)", + GL_CACHE("TC: dst %s hit (%s, OFF <%d,%d>): (0x%x, %s)", to_string(dst->m_type), half_right ? "half" : "full", x_offset, y_offset, - dst->m_texture ? dst->m_texture->GetID() : 0, TEX0.TBP0, psm_str(TEX0.PSM)); } @@ -1094,8 +1068,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con } else { - GL_CACHE("TC: src hit: %d (0x%x, 0x%x, %s)", - src->m_texture ? src->m_texture->GetID() : 0, + GL_CACHE("TC: src hit: (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); @@ -1125,7 +1098,7 @@ GSTextureCache::Target* GSTextureCache::FindTargetOverlap(u32 bp, u32 end_block, return nullptr; } -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask, const bool is_frame, bool preload, bool is_clear) +GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask, const bool is_frame, bool preload, bool is_clear) { const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; const u32 bp = TEX0.TBP0; @@ -1188,7 +1161,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con } dst = t; - GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", size.x, size.y, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); + GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: (0x%x -> 0x%x %s)", size.x, size.y, bp, t->m_end_block, psm_str(TEX0.PSM)); if (size.x > 0 || size.y > 0) ScaleTargetForDisplay(dst, TEX0, size.x, size.y); @@ -1210,7 +1183,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con continue; dst = t; - GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", size.x, size.y, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM)); + GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: (0x%x, took 0x%x -> 0x%x %s)", size.x, size.y, bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM)); if (size.x > 0 || size.y > 0) ScaleTargetForDisplay(dst, TEX0, size.x, size.y); @@ -1233,7 +1206,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con if (bp == t->m_TEX0.TBP0) { dst = t; - GL_CACHE("TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s)", size.x, size.y, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM)); + GL_CACHE("TC: Lookup Frame %dx%d, empty hit: (0x%x -> 0x%x %s)", size.x, size.y, bp, t->m_end_block, psm_str(TEX0.PSM)); break; } } @@ -1454,7 +1427,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con return dst; } -GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale) +GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale) { return LookupTarget(TEX0, size, scale, RenderTarget, true, 0, true); } @@ -1576,10 +1549,12 @@ void GSTextureCache::InvalidateVideoMemType(int type, u32 bp) if (bp == t->m_TEX0.TBP0) { - GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) (0x%x)", to_string(type), t->m_TEX0.TBP0); + // Need to also remove any sources which reference this target. + InvalidateSourcesFromTarget(t); + list.erase(i); delete t; @@ -1631,35 +1606,6 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r } } } - - // Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380 - // Note: the game only does a 0 direct write. If some games expect some real data - // we are screwed. - if (g_gs_renderer->m_game.title == CRC::HauntingGround) - { - u32 end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats - auto type = RenderTarget; - - for (auto t : m_dst[type]) - { - if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block) - { - // Haunting ground expect to clean buffer B with a rendering into buffer A. - // Situation is quite messy as it would require to extract the data from the buffer A - // and to move in buffer B. - // - // Of course buffers don't share the same line width. You can't delete the buffer as next - // miss will load invalid data. - // - // So just clear the damn buffer and forget about it. - GL_CACHE("TC: Clear Sub Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, - t->m_TEX0.TBP0); - g_gs_device->ClearRenderTarget(t->m_texture, 0); - t->m_dirty.clear(); - } - } - } } bool found = false; @@ -1768,8 +1714,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r { if (!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM) && bw == std::max(t->m_TEX0.TBW, 1U)) { - GL_CACHE("TC: Dirty Target(%s) %d (0x%x) r(%d,%d,%d,%d)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Dirty Target(%s) (0x%x) r(%d,%d,%d,%d)", to_string(type), t->m_TEX0.TBP0, r.x, r.y, r.z, r.w); if (eewrite) @@ -1794,9 +1739,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (bw == std::max(t->m_TEX0.TBW, 1U) && GSLocalMemory::m_psm[psm].bpp == GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) { AddDirtyRectTarget(t, rect, psm, bw, rgba); - GL_CACHE("TC: Direct Dirty in the middle [aggressive] of Target(%s) %d [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", + GL_CACHE("TC: Direct Dirty in the middle [aggressive] of Target(%s) [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, psm_str(t->m_TEX0.PSM), t->m_TEX0.TBP0, t->m_end_block, @@ -1871,9 +1815,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (so.is_valid) { AddDirtyRectTarget(t, so.b2a_offset, psm, bw, rgba); - GL_CACHE("TC: Dirty in the middle [aggressive] of Target(%s) %d [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", + GL_CACHE("TC: Dirty in the middle [aggressive] of Target(%s) [PSM:%s BP:0x%x->0x%x BW:%u rect(%d,%d=>%d,%d)] write[PSM:%s BP:0x%x BW:%u rect(%d,%d=>%d,%d)]", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, psm_str(t->m_TEX0.PSM), t->m_TEX0.TBP0, t->m_end_block, @@ -1910,8 +1853,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r else { i = list.erase(j); - GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Remove Target(%s) (0x%x)", to_string(type), t->m_TEX0.TBP0); delete t; } @@ -1951,8 +1893,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (r.bottom > y) { - GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Dirty After Target(%s) (0x%x)", to_string(type), t->m_TEX0.TBP0); if (eewrite) @@ -1981,8 +1922,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r { const int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize; - GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Dirty in the middle of Target(%s) (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type), t->m_TEX0.TBP0, t->m_end_block, r.left, r.top + y, r.right, r.bottom + y, bw); @@ -2005,8 +1945,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r if (t->m_age > 1 && bw > 1 && bw != t->m_TEX0.TBW) { i = list.erase(j); - GL_CACHE("TC: Tex in RT Remove Old Target(%s) %d (0x%x) TPSM %x PSM %x bp 0x%x", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Tex in RT Remove Old Target(%s) (0x%x) TPSM %x PSM %x bp 0x%x", to_string(type), t->m_TEX0.TBP0, t->m_TEX0.PSM, psm, @@ -2030,8 +1969,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r else { i = list.erase(j); - GL_CACHE("TC: Tex in RT Remove Target(%s) %d (0x%x) TPSM %x PSM %x bp 0x%x", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Tex in RT Remove Target(%s) (0x%x) TPSM %x PSM %x bp 0x%x", to_string(type), t->m_TEX0.TBP0, t->m_TEX0.PSM, psm, @@ -2653,10 +2591,10 @@ GSTextureCache::Target* GSTextureCache::GetTargetWithSharedBits(u32 BP, u32 PSM) return nullptr; } -u32 GSTextureCache::GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height) +u32 GSTextureCache::GetTargetHeight(u32 bp, u32 fbw, u32 psm, u32 min_height) { TargetHeightElem search = {}; - search.fbp = fbp; + search.bp = bp; search.fbw = fbw; search.psm = psm; search.height = min_height; @@ -2668,7 +2606,7 @@ u32 GSTextureCache::GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height) { if (elem.height < min_height) { - DbgCon.WriteLn("Expand height at %x %u %u from %u to %u", fbp, fbw, psm, elem.height, min_height); + DbgCon.WriteLn("Expand height at %x %u %u from %u to %u", bp, fbw, psm, elem.height, min_height); elem.height = min_height; } @@ -2678,7 +2616,7 @@ u32 GSTextureCache::GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height) } } - DbgCon.WriteLn("New height at %x %u %u: %u", fbp, fbw, psm, min_height); + DbgCon.WriteLn("New height at %x %u %u: %u", bp, fbw, psm, min_height); m_target_heights.push_front(search); return min_height; } @@ -2732,6 +2670,9 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt) GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x", rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block); + // Need to also remove any sources which reference this target. + InvalidateSourcesFromTarget(t); + i = list.erase(i); delete t; } @@ -2742,6 +2683,42 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt) } } +void GSTextureCache::InvalidateVideoMemTargets(int type, u32 bp, u32 bw, u32 psm, const GSVector4i& r) +{ + auto& list = m_dst[type]; + + for (auto i = list.begin(); i != list.end();) + { + GSTextureCache::Target* t = *i; + auto ei = i++; + + if (t->m_TEX0.TBP0 != bp && t->Overlaps(bp, bw, psm, r)) + { + GL_CACHE("InvalidateVideoMemTargets(%x, %u, %s, %d,%d => %d,%d): Removing target at %x %u %s", bp, bw, + psm_str(psm), r.x, r.y, r.z, r.w, t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + + // Need to also remove any sources which reference this target. + InvalidateSourcesFromTarget(t); + + list.erase(ei); + delete t; + } + } +} + +void GSTextureCache::InvalidateSourcesFromTarget(const Target* t) +{ + for (auto it = m_src.m_surfaces.begin(); it != m_src.m_surfaces.end();) + { + Source* src = *it++; + if (src->m_from_target == t) + { + GL_CACHE("TC: Removing source at %x referencing target", src->m_TEX0.TBP0); + m_src.RemoveAt(src); + } + } +} + void GSTextureCache::IncAge() { const int max_age = m_src.m_used ? 3 : 6; @@ -2752,21 +2729,9 @@ void GSTextureCache::IncAge() { Source* s = *i; - if (s->m_shared_texture) - { - // Shared textures are temporary only added in the hash set but not in the texture - // cache list therefore you can't use RemoveAt - i = m_src.m_surfaces.erase(i); - delete s; - } - else - { - ++i; - if (++s->m_age > (s->CanPreload() ? max_preload_age : max_age)) - { - m_src.RemoveAt(s); - } - } + ++i; + if (++s->m_age > (s->CanPreload() ? max_preload_age : max_age)) + m_src.RemoveAt(s); } const u32 max_hash_cache_age = 30; @@ -2817,8 +2782,7 @@ void GSTextureCache::IncAge() if (++t->m_age > max_rt_age) { i = list.erase(i); - GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type), - t->m_texture ? t->m_texture->GetID() : 0, + GL_CACHE("TC: Remove Target(%s): (0x%x) due to age", to_string(type), t->m_TEX0.TBP0); delete t; @@ -2851,6 +2815,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; Source* src = new Source(TEX0, TEXA); + // For debugging, we have an option to force copies instead of sampling the target directly. + static constexpr bool force_target_copy = false; + // Normally we wouldn't use the region with targets, but for the case where we're drawing UVs and the // clamp rectangle exceeds the TW/TH (which is now unused), we do need to use it. Timesplitters 2 does // its frame blending effect using a smaller TW/TH, *and* triangles instead of sprites just to be extra @@ -2880,26 +2847,46 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con const int w = static_cast(std::ceil(scale * tw)); const int h = static_cast(std::ceil(scale * th)); - // if we have a source larger than the target (from tex-in-rt), we need to clear it, otherwise we'll read junk - const bool outside_target = ((x + w) > dst->m_texture->GetWidth() || (y + h) > dst->m_texture->GetHeight()); - GSTexture* sTex = dst->m_texture; - GSTexture* dTex = outside_target ? - g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, true) : - g_gs_device->CreateTexture(w, h, tlevels, GSTexture::Format::Color, true); - m_source_memory_usage += dTex->GetMemUsage(); + // If we have a source larger than the target (from tex-in-rt), texelFetch() for target region will return black. + if constexpr (force_target_copy) + { + // If we have a source larger than the target, we need to clear it, otherwise we'll read junk + const bool outside_target = ((x + w) > dst->m_texture->GetWidth() || (y + h) > dst->m_texture->GetHeight()); + GSTexture* sTex = dst->m_texture; + GSTexture* dTex = outside_target ? + g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, true) : + g_gs_device->CreateTexture(w, h, tlevels, GSTexture::Format::Color, true); + m_source_memory_usage += dTex->GetMemUsage(); - // copy the rt in - const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); - if (!area.rempty()) - g_gs_device->CopyRect(sTex, dTex, area, 0, 0); + // copy the rt in + const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); + if (!area.rempty()) + g_gs_device->CopyRect(sTex, dTex, area, 0, 0); + + src->m_texture = dTex; + src->m_unscaled_size = GSVector2i(tw, th); + } + else + { + GL_CACHE("TC: Sample offset (%d,%d) reduced region directly from target: %dx%d -> %dx%d @ %d,%d", + dst->m_texture->GetWidth(), x_offset, y_offset, dst->m_texture->GetHeight(), w, h, x_offset, y_offset); + + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); + src->m_texture = dst->m_texture; + src->m_unscaled_size = dst->m_unscaled_size; + src->m_shared_texture = true; + } + + // Invalidate immediately on recursive draws, because if we don't here, InvalidateVideoMem() will. + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + m_temporary_source = src; // Keep a trace of origin of the texture - src->m_texture = dTex; src->m_scale = scale; - src->m_unscaled_size = GSVector2i(tw, th); src->m_end_block = dst->m_end_block; src->m_target = true; - src->m_from_target = &dst->m_texture; + src->m_from_target = dst; src->m_from_target_TEX0 = dst->m_TEX0; if (psm.pal > 0) @@ -2908,43 +2895,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con AttachPaletteToSource(src, psm.pal, true); } } - else if (dst && GSRendererHW::GetInstance()->UpdateTexIsFB(dst, TEX0)) - { - // This shortcut is a temporary solution. It isn't a good solution - // as it won't work with Channel Shuffle/Texture Shuffle pattern - // (we need texture cache result to detect those effects). - // Instead a better solution would be to defer the copy/StrechRect later - // in the rendering. - // Still this poor solution is enough for a huge speed up in a couple of games - // - // Be aware that you can't use StrechRect between BeginScene/EndScene. - // So it could be tricky to put in the middle of the DrawPrims - - // Keep a trace of origin of the texture - src->m_texture = dst->m_texture; - src->m_scale = dst->m_scale; - src->m_unscaled_size = dst->m_unscaled_size; - src->m_target = true; - src->m_shared_texture = true; - src->m_from_target = &dst->m_texture; - src->m_from_target_TEX0 = dst->m_TEX0; - src->m_end_block = dst->m_end_block; - src->m_32_bits_fmt = dst->m_32_bits_fmt; - - // Even if we sample the framebuffer directly we might need the palette - // to handle the format conversion on GPU - if (psm.pal > 0) - AttachPaletteToSource(src, psm.pal, true); - - // This will get immediately invalidated. - m_temporary_source = src; - } else if (dst) { // TODO: clean up this mess ShaderConvert shader = dst->m_type != RenderTarget ? ShaderConvert::FLOAT32_TO_RGBA8 : ShaderConvert::COPY; - const bool is_8bits = TEX0.PSM == PSM_PSMT8; + const bool channel_shuffle = GSRendererHW::GetInstance()->TestChannelShuffle(dst); + const bool is_8bits = TEX0.PSM == PSM_PSMT8 && !channel_shuffle; if (is_8bits) { @@ -2967,7 +2924,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Keep a trace of origin of the texture src->m_target = true; src->m_unscaled_size = GSVector2i(std::min(dst->m_unscaled_size.x, tw), std::min(dst->m_unscaled_size.y, th)); - src->m_from_target = &dst->m_texture; + src->m_from_target = dst; src->m_from_target_TEX0 = dst->m_TEX0; src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; @@ -3076,44 +3033,18 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con DevCon.Error("Invalid half-right copy with width %d from %dx%d texture", half_width * 2, dst->m_unscaled_size.x, dst->m_unscaled_size.y); } } - else if (src_range && dst->m_TEX0.TBW == TEX0.TBW && !is_8bits) - { - // optimization for TBP == FRAME - const GSDrawingContext* const context = g_gs_renderer->m_context; - if (context->FRAME.Block() == TEX0.TBP0 || context->ZBUF.Block() == TEX0.TBP0) - { - // For the TS2 case above, src_range is going to be incorrect, since TW/TH are incorrect. - // We can remove this check once we move it to tex-is-fb instead. - if (!region.IsFixedTEX0(1 << TEX0.TW, 1 << TEX0.TH)) - { - // if it looks like a texture shuffle, we might read up to +/- 8 pixels on either side. - GSVector4 adjusted_src_range(*src_range); - if (GSRendererHW::GetInstance()->IsPossibleTextureShuffle(dst, TEX0)) - adjusted_src_range += GSVector4(-8.0f, 0.0f, 8.0f, 0.0f); - - // don't forget to scale the copy range - adjusted_src_range = adjusted_src_range * GSVector4(scale).xyxy(); - sRect = sRect.rintersect(GSVector4i(adjusted_src_range)); - destX = sRect.x; - destY = sRect.y; - } - - // clean up immediately afterwards - m_temporary_source = src; - } - } // Create a cleared RT if we somehow end up with an empty source rect (because the RT isn't large enough). const bool source_rect_empty = sRect.rempty(); const bool use_texture = (shader == ShaderConvert::COPY && !source_rect_empty); + const GSVector2i dst_texture_size = dst->m_texture->GetSize(); // Assuming everything matches up, instead of copying the target, we can just sample it directly. // It's the same as doing the copy first, except we save GPU time. + // TODO: We still need to copy if the TBW is mismatched. Except when TBW <= 1 (Jak 2). if (!half_right && // not the size change from above use_texture && // not reinterpreting the RT - new_size == dst->m_texture->GetSize() && // same dimensions - !m_temporary_source // not the shuffle case above - ) + !force_target_copy) { // sample the target directly src->m_texture = dst->m_texture; @@ -3121,14 +3052,30 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; src->m_target = true; // So renderer can check if a conversion is required - src->m_from_target = &dst->m_texture; // avoid complex condition on the renderer + src->m_from_target = dst; // avoid complex condition on the renderer src->m_from_target_TEX0 = dst->m_TEX0; src->m_32_bits_fmt = dst->m_32_bits_fmt; src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; - // kill the source afterwards, since we don't want to have to track changes to the target - m_temporary_source = src; + // if the size doesn't match, we need to engage shader sampling. + if (new_size != dst_texture_size) + { + GL_CACHE("TC: Sample reduced region directly from target: %dx%d -> %dx%d", dst_texture_size.x, + dst_texture_size.y, new_size.x, new_size.y); + + if (new_size.x != dst_texture_size.x) + src->m_region.SetX(0, tw); + if (new_size.y != dst_texture_size.y) + src->m_region.SetY(0, th); + } + + // kill source immediately if it's the RT/DS, because that'll get invalidated immediately + if (GSRendererHW::GetInstance()->IsTBPFrameOrZ(dst->m_TEX0.TBP0)) + { + GL_CACHE("TC: Source is RT or ZBUF, invalidating after draw."); + m_temporary_source = src; + } } else { @@ -3172,14 +3119,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Offset hack. Can be enabled via GS options. // The offset will be used in Draw(). - float modxy = 0.0f; - - if (hack) - { - modxy = g_gs_renderer->GetModXYOffset(); - } - - dst->OffsetHack_modxy = modxy; + dst->OffsetHack_modxy = hack ? g_gs_renderer->GetModXYOffset() : 0.0f; } else { @@ -3225,12 +3165,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con ASSERT(src->m_texture); ASSERT(src->m_target == (dst != nullptr)); - ASSERT(src->m_from_target == (dst ? &dst->m_texture : nullptr)); + ASSERT(src->m_from_target == dst); ASSERT(src->m_scale == ((!dst || TEX0.PSM == PSM_PSMT8) ? 1.0f : dst->m_scale)); - src->SetPages(); - - m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex); + if (src != m_temporary_source) + { + src->SetPages(); + m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex); + } return src; } @@ -3813,7 +3755,7 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r) // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); - GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); + GL_PERF("TC: Read Back Target: (0x%x)[fmt: 0x%x]. Size %dx%d", TEX0.TBP0, TEX0.PSM, r.width(), r.height()); const GSVector4 src(GSVector4(r) * GSVector4(t->m_scale) / GSVector4(t->m_texture->GetSize()).xyxy()); const GSVector4i drc(0, 0, r.width(), r.height()); @@ -3902,11 +3844,6 @@ GSTextureCache::Surface::Surface() = default; GSTextureCache::Surface::~Surface() = default; -void GSTextureCache::Surface::UpdateAge() -{ - m_age = 0; -} - bool GSTextureCache::Surface::Inside(u32 bp, u32 bw, u32 psm, const GSVector4i& rect) { // Valid only for color formats. @@ -3961,7 +3898,7 @@ GSTextureCache::Source::~Source() // to recycle. if (!m_shared_texture && !m_from_hash_cache && m_texture) { - GSRendererHW::GetInstance()->GetTextureCache()->m_source_memory_usage -= m_texture->GetMemUsage(); + g_texture_cache->m_source_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } } @@ -3985,7 +3922,9 @@ void GSTextureCache::Source::SetPages() void GSTextureCache::Source::Update(const GSVector4i& rect, int level) { - Surface::UpdateAge(); + m_age = 0; + if (m_from_target) + m_from_target->m_age = 0; if (m_target || m_from_hash_cache || (m_complete_layers & (1u << level))) return; @@ -4003,13 +3942,13 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int level) GSVector4i r(rect); const GSVector4i region_rect(m_region.GetRect(tw, th)); - // Offset the pages we use by the clamp region. + // Clamp to region, the input rect should already be moved to it. if (m_region.HasEither()) - r = (r + m_region.GetOffset(tw, th)).rintersect(region_rect); + r = r.rintersect(region_rect); r = r.ralign(bs); - if (region_rect.eq(m_region.HasEither() ? r.rintersect(region_rect) : r)) + if (region_rect.eq(r.rintersect(region_rect))) m_complete_layers |= (1u << level); const GSOffset& off = g_gs_renderer->m_context->offset.tex; @@ -4240,15 +4179,29 @@ GSTextureCache::Target::~Target() pxAssert(!m_shared_texture); if (m_texture) { - GSRendererHW::GetInstance()->GetTextureCache()->m_target_memory_usage -= m_texture->GetMemUsage(); + g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); g_gs_device->Recycle(m_texture); } + +#ifdef PCSX2_DEVBUILD + // Make sure all sources referencing this target have been removed. + for (GSTextureCache::Source* src : g_texture_cache->m_src.m_surfaces) + { + if (src->m_from_target == this) + { + pxFail(fmt::format("Source at TBP {:x} for target at TBP {:x} on target invalidation", + static_cast(src->m_TEX0.TBP0), static_cast(m_TEX0.TBP0)) + .c_str()); + break; + } + } +#endif } void GSTextureCache::Target::Update(bool reset_age) { if (reset_age) - Surface::UpdateAge(); + m_age = 0; // FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :) // GH: it must be doable @@ -4476,21 +4429,36 @@ bool GSTextureCache::Target::ResizeTexture(int new_unscaled_width, int new_unsca return false; } - const GSVector4i rc(0, 0, std::min(width, new_width), std::min(height, new_height)); - if (tex->IsDepthStencil()) + // Only need to copy if it's been written to. + if (m_texture->GetState() == GSTexture::State::Dirty) { - // Can't do partial copies in DirectX for depth textures, and it's probably not ideal in other - // APIs either. So use a fullscreen quad setting depth instead. - g_gs_device->StretchRect(m_texture, tex, GSVector4(rc), ShaderConvert::DEPTH_COPY, false); + const GSVector4i rc(0, 0, std::min(width, new_width), std::min(height, new_height)); + if (tex->IsDepthStencil()) + { + // Can't do partial copies in DirectX for depth textures, and it's probably not ideal in other + // APIs either. So use a fullscreen quad setting depth instead. + g_gs_device->StretchRect(m_texture, tex, GSVector4(rc), ShaderConvert::DEPTH_COPY, false); + } + else + { + // Fast memcpy()-like path for color targets. + g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + } + } + else if (m_texture->GetState() == GSTexture::State::Cleared) + { + // Otherwise just pass the clear through. + if (tex->GetType() != GSTexture::Type::DepthStencil) + g_gs_device->ClearRenderTarget(tex, tex->GetClearColor()); + else + g_gs_device->ClearDepth(tex/*, tex->GetClearDepth()*/); } else { - // Fast memcpy()-like path for color targets. - g_gs_device->CopyRect(m_texture, tex, rc, 0, 0); + g_gs_device->InvalidateRenderTarget(tex); } - GSTextureCache* tc = GSRendererHW::GetInstance()->GetTextureCache(); - tc->m_target_memory_usage = (tc->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); + g_texture_cache->m_target_memory_usage = (g_texture_cache->m_target_memory_usage - m_texture->GetMemUsage()) + tex->GetMemUsage(); if (recycle_old) g_gs_device->Recycle(m_texture); @@ -4543,9 +4511,8 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s) { m_surfaces.erase(s); - GL_CACHE("TC: Remove Src Texture: %d (0x%x)", - s->m_texture ? s->m_texture->GetID() : 0, - s->m_TEX0.TBP0); + GL_CACHE("TC: Remove Src Texture: 0x%x TBW %u PSM %s", + s->m_TEX0.TBP0, s->m_TEX0.TBW, psm_str(s->m_TEX0.PSM)); s->m_pages.loopPages([this, s](u32 page) { @@ -4765,7 +4732,7 @@ void GSTextureCache::InvalidateTemporarySource() if (!m_temporary_source) return; - m_src.RemoveAt(m_temporary_source); + delete m_temporary_source; m_temporary_source = nullptr; } @@ -4817,7 +4784,7 @@ GSTextureCache::Palette::~Palette() { if (m_tex_palette) { - GSRendererHW::GetInstance()->GetTextureCache()->m_source_memory_usage -= m_tex_palette->GetMemUsage(); + g_texture_cache->m_source_memory_usage -= m_tex_palette->GetMemUsage(); g_gs_device->Recycle(m_tex_palette); } @@ -4845,7 +4812,7 @@ void GSTextureCache::Palette::InitializeTexture() // and therefore will read texel 15/255 * texture size). m_tex_palette = g_gs_device->CreateTexture(m_pal, 1, 1, GSTexture::Format::Color); m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); - GSRendererHW::GetInstance()->GetTextureCache()->m_source_memory_usage += m_tex_palette->GetMemUsage(); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } } @@ -4998,6 +4965,11 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur return true; } +bool GSTextureCache::SourceRegion::IsFixedTEX0(GIFRegTEX0 TEX0) const +{ + return IsFixedTEX0(1 << TEX0.TW, 1 << TEX0.TH); +} + bool GSTextureCache::SourceRegion::IsFixedTEX0(int tw, int th) const { return IsFixedTEX0W(tw) || IsFixedTEX0H(th); @@ -5013,6 +4985,11 @@ bool GSTextureCache::SourceRegion::IsFixedTEX0H(int th) const return (GetMaxY() > static_cast(th)); } +GSVector2i GSTextureCache::SourceRegion::GetSize(int tw, int th) const +{ + return GSVector2i(HasX() ? GetWidth() : tw, HasY() ? GetHeight() : th); +} + GSVector4i GSTextureCache::SourceRegion::GetRect(int tw, int th) const { return GSVector4i(HasX() ? GetMinX() : 0, HasY() ? GetMinY() : 0, HasX() ? GetMaxX() : tw, HasY() ? GetMaxY() : th); @@ -5050,6 +5027,55 @@ void GSTextureCache::SourceRegion::AdjustTEX0(GIFRegTEX0* TEX0) const TEX0->TBP0 += offset.bn(GetMinX(), GetMinY()); } +GSTextureCache::SourceRegion GSTextureCache::SourceRegion::Create(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP) +{ + SourceRegion region = {}; + + if (CLAMP.WMS == CLAMP_REGION_CLAMP && CLAMP.MAXU >= CLAMP.MINU) + { + // Another Lupin case here, it uses region clamp with UV (not ST), puts a clamp region further + // into the texture, but a smaller TW/TH. Catch this by looking for a clamp range above TW. + const u32 rw = CLAMP.MAXU - CLAMP.MINU + 1; + if (rw < (1u << TEX0.TW) || CLAMP.MAXU >= (1u << TEX0.TW)) + { + region.SetX(CLAMP.MINU, CLAMP.MAXU + 1); + GL_CACHE("TC: Region clamp optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); + } + } + else if (CLAMP.WMS == CLAMP_REGION_REPEAT && CLAMP.MINU != 0) + { + // Lupin the 3rd is really evil, it sets TW/TH to the texture size, but then uses region repeat + // to offset the actual texture data to elsewhere. So, we'll just force any cases like this down + // the region texture path. + const u32 rw = ((CLAMP.MINU | CLAMP.MAXU) - CLAMP.MAXU) + 1; + if (rw < (1u << TEX0.TW) || (CLAMP.MAXU != 0 && (rw <= (1u << TEX0.TW)))) + { + region.SetX(CLAMP.MAXU, (CLAMP.MINU | CLAMP.MAXU) + 1); + GL_CACHE("TC: Region repeat optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); + } + } + if (CLAMP.WMT == CLAMP_REGION_CLAMP && CLAMP.MAXV >= CLAMP.MINV) + { + const u32 rh = CLAMP.MAXV - CLAMP.MINV + 1; + if (rh < (1u << TEX0.TH) || CLAMP.MAXV >= (1u << TEX0.TH)) + { + region.SetY(CLAMP.MINV, CLAMP.MAXV + 1); + GL_CACHE("TC: Region clamp optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); + } + } + else if (CLAMP.WMT == CLAMP_REGION_REPEAT && CLAMP.MINV != 0) + { + const u32 rh = ((CLAMP.MINV | CLAMP.MAXV) - CLAMP.MAXV) + 1; + if (rh < (1u << TEX0.TH) || (CLAMP.MAXV != 0 && (rh <= (1u << TEX0.TH)))) + { + region.SetY(CLAMP.MAXV, (CLAMP.MINV | CLAMP.MAXV) + 1); + GL_CACHE("TC: Region repeat optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); + } + } + + return region; +} + using BlockHashState = XXH3_state_t; __fi static void BlockHashReset(BlockHashState& st) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 876b63978e..526b57e18f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -64,10 +64,14 @@ public: u32 GetHeight() const { return (GetMaxY() - GetMinY()); } /// Returns true if the area of the region exceeds the TW/TH size (i.e. "fixed tex0"). + bool IsFixedTEX0(GIFRegTEX0 TEX0) const; bool IsFixedTEX0(int tw, int th) const; bool IsFixedTEX0W(int tw) const; bool IsFixedTEX0H(int th) const; + /// Returns the size that the region occupies. + GSVector2i GetSize(int tw, int th) const; + /// Returns the rectangle relative to the texture base pointer that the region occupies. GSVector4i GetRect(int tw, int th) const; @@ -80,6 +84,9 @@ public: /// Adjusts the texture base pointer and block width relative to the region. void AdjustTEX0(GIFRegTEX0* TEX0) const; + + /// Creates a new source region based on the CLAMP register. + static SourceRegion Create(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP); }; using HashType = u64; @@ -146,8 +153,6 @@ public: /// Can be used for overlap tests. u32 UnwrappedEndBlock() const { return (m_end_block + (Wraps() ? MAX_BLOCKS : 0)); } - void UpdateAge(); - bool Inside(u32 bp, u32 bw, u32 psm, const GSVector4i& rect); bool Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i& rect); }; @@ -196,58 +201,6 @@ public: bool operator()(const PaletteKey& lhs, const PaletteKey& rhs) const; }; - class Source : public Surface - { - struct - { - GSVector4i* rect; - u32 count; - } m_write = {}; - - void PreloadLevel(int level); - - void Write(const GSVector4i& r, int layer); - void Flush(u32 count, int layer); - - public: - HashCacheEntry* m_from_hash_cache = nullptr; - std::shared_ptr m_palette_obj; - std::unique_ptr m_valid;// each u32 bits map to the 32 blocks of that page - GSTexture* m_palette = nullptr; - GSVector4i m_valid_rect = {}; - GSVector2i m_lod = {}; - SourceRegion m_region = {}; - u8 m_valid_hashes = 0; - u8 m_complete_layers = 0; - bool m_target = false; - bool m_repeating = false; - std::vector* m_p2t = nullptr; - // Keep a trace of the target origin. There is no guarantee that pointer will - // still be valid on future. However it ought to be good when the source is created - // so it can be used to access un-converted data for the current draw call. - GSTexture** m_from_target = nullptr; - GIFRegTEX0 m_from_target_TEX0 = {}; // TEX0 of the target texture, if any, else equal to texture TEX0 - GIFRegTEX0 m_layer_TEX0[7] = {}; // Detect already loaded value - HashType m_layer_hash[7] = {}; - // Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase - // Deliberately not initialized to save cycles. - std::array m_erase_it; - GSOffset::PageLooper m_pages; - - public: - Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); - virtual ~Source(); - - __fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); } - - void SetPages(); - - void Update(const GSVector4i& rect, int layer = 0); - void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0); - - bool ClutMatch(const PaletteKey& palette_key); - }; - class Target : public Surface { public: @@ -282,6 +235,63 @@ public: bool ResizeTexture(int new_unscaled_width, int new_unscaled_height, bool recycle_old = true); }; + class Source : public Surface + { + struct + { + GSVector4i* rect; + u32 count; + } m_write = {}; + + void PreloadLevel(int level); + + void Write(const GSVector4i& r, int layer); + void Flush(u32 count, int layer); + + public: + HashCacheEntry* m_from_hash_cache = nullptr; + std::shared_ptr m_palette_obj; + std::unique_ptr m_valid;// each u32 bits map to the 32 blocks of that page + GSTexture* m_palette = nullptr; + GSVector4i m_valid_rect = {}; + GSVector2i m_lod = {}; + SourceRegion m_region = {}; + u8 m_valid_hashes = 0; + u8 m_complete_layers = 0; + bool m_target = false; + bool m_repeating = false; + std::vector* m_p2t = nullptr; + // Keep a trace of the target origin. There is no guarantee that pointer will + // still be valid on future. However it ought to be good when the source is created + // so it can be used to access un-converted data for the current draw call. + Target* m_from_target = nullptr; + GIFRegTEX0 m_from_target_TEX0 = {}; // TEX0 of the target texture, if any, else equal to texture TEX0 + GIFRegTEX0 m_layer_TEX0[7] = {}; // Detect already loaded value + HashType m_layer_hash[7] = {}; + // Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase + // Deliberately not initialized to save cycles. + std::array m_erase_it; + GSOffset::PageLooper m_pages; + + public: + Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + virtual ~Source(); + + __fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); } + __fi bool IsFromTarget() const { return m_target; } + + __fi const SourceRegion& GetRegion() const { return m_region; } + __fi GSVector2i GetRegionSize() const { return m_region.GetSize(m_unscaled_size.x, m_unscaled_size.y); } + __fi GSVector4i GetRegionRect() const { return m_region.GetRect(m_unscaled_size.x, m_unscaled_size.y); } + + void SetPages(); + + void Update(const GSVector4i& rect, int layer = 0); + void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0); + + bool ClutMatch(const PaletteKey& palette_key); + }; + class PaletteMap { private: @@ -328,7 +338,7 @@ public: struct { - u32 fbp : 14; + u32 bp : 14; u32 fbw : 6; u32 psm : 6; u32 pad : 6; @@ -434,20 +444,27 @@ public: Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette = false); Target* FindTargetOverlap(u32 bp, u32 end_block, int type, int psm); - Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask = 0, const bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool is_clear = false); - Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, float scale); + Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used, u32 fbmask = 0, const bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool is_clear = false); + Target* LookupDisplayTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale); /// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly. Target* GetExactTarget(u32 BP, u32 BW, u32 PSM) const; Target* GetTargetWithSharedBits(u32 BP, u32 PSM) const; - u32 GetTargetHeight(u32 fbp, u32 fbw, u32 psm, u32 min_height); + u32 GetTargetHeight(u32 bp, u32 fbw, u32 psm, u32 min_height); bool Has32BitTarget(u32 bp); void InvalidateVideoMemType(int type, u32 bp); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool eewrite = false, bool target = true); void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r); + + /// Removes any targets overlapping the specified BP and rectangle. + void InvalidateVideoMemTargets(int type, u32 bp, u32 bw, u32 psm, const GSVector4i& r); + + /// Removes any sources which point to the specified target. + void InvalidateSourcesFromTarget(const Target* t); + bool Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u32 DBW, u32 DPSM, int dx, int dy, int w, int h); bool ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx, int dy, int w, int h); @@ -470,3 +487,5 @@ public: /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex); }; + +extern std::unique_ptr g_texture_cache; diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp index 06c33d75a9..f847cdc552 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp @@ -130,9 +130,6 @@ namespace GSTextureReplacements static std::string s_current_serial; - /// Backreference to the texture cache so we can inject replacements. - static GSTextureCache* s_tc; - /// Textures that have been dumped, to save stat() calls. static std::unordered_set s_dumped_textures; @@ -302,9 +299,8 @@ std::string GSTextureReplacements::GetDumpFilename(const TextureName& name, u32 return ret; } -void GSTextureReplacements::Initialize(GSTextureCache* tc) +void GSTextureReplacements::Initialize() { - s_tc = tc; s_current_serial = VMManager::GetGameSerial(); if (GSConfig.DumpReplaceableTextures || GSConfig.LoadTextureReplacements) @@ -315,9 +311,6 @@ void GSTextureReplacements::Initialize(GSTextureCache* tc) void GSTextureReplacements::GameChanged() { - if (!s_tc) - return; - std::string new_serial(VMManager::GetGameSerial()); if (s_current_serial == new_serial) return; @@ -420,7 +413,6 @@ void GSTextureReplacements::Shutdown() std::string().swap(s_current_serial); ClearReplacementTextures(); ClearDumpedTextureList(); - s_tc = nullptr; } u32 GSTextureReplacements::CalcMipmapLevelsForReplacement(u32 width, u32 height) @@ -637,7 +629,7 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures() // upload and inject into TC GSTexture* tex = CreateReplacementTexture(it->second, mipmap); if (tex) - s_tc->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex); + g_texture_cache->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex); } s_async_loaded_textures.clear(); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h index 9b336afdc5..6761870e37 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h @@ -38,7 +38,7 @@ namespace GSTextureReplacements std::vector mips; }; - void Initialize(GSTextureCache* tc); + void Initialize(); void GameChanged(); void ReloadReplacementMap(); void UpdateConfig(Pcsx2Config::GSOptions& old_config); diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index f821f89162..f223cc94f6 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1027,6 +1027,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_DATE {}\n", sel.date) + fmt::format("#define PS_TCOFFSETHACK {}\n", sel.tcoffsethack) + fmt::format("#define PS_POINT_SAMPLER {}\n", sel.point_sampler) + + fmt::format("#define PS_REGION_RECT {}\n", sel.region_rect) + fmt::format("#define PS_BLEND_A {}\n", sel.blend_a) + fmt::format("#define PS_BLEND_B {}\n", sel.blend_b) + fmt::format("#define PS_BLEND_C {}\n", sel.blend_c) @@ -1164,7 +1165,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture // Init // ************************************ - GL_PUSH("StretchRect from %d to %d", sTex->GetID(), dTex->GetID()); + GL_PUSH("StretchRect from %d to %d", static_cast(sTex)->GetID(), static_cast(dTex)->GetID()); if (draw_in_depth) OMSetRenderTargets(NULL, dTex); else diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h index 7d6f22f7f1..1a8925a8a5 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.h @@ -66,7 +66,7 @@ public: return (m_int_type == GL_UNSIGNED_BYTE || m_int_type == GL_UNSIGNED_SHORT || m_int_type == GL_UNSIGNED_INT); } - u32 GetID() final { return m_texture_id; } + u32 GetID() { return m_texture_id; } bool HasBeenCleaned() { return m_clean; } void WasAttached() { m_clean = false; } void WasCleaned() { m_clean = true; } diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index 36c780ed83..48abec5e3c 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -1065,7 +1065,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) GIFRegTEX0 TEX0 = m_context->GetSizeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t), m_vt.IsLinear(), mipmap); - GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf).coverage; + GSVector4i r = GetTextureMinMax(TEX0, context->CLAMP, gd.sel.ltf, true).coverage; GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA); @@ -1171,7 +1171,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) return false; } - GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf).coverage; + GSVector4i r = GetTextureMinMax(MIP_TEX0, MIP_CLAMP, gd.sel.ltf, true).coverage; data->SetSource(t, r, i); } diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 76808c5563..aa192e2773 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -2085,6 +2085,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_DATE", sel.date); AddMacro(ss, "PS_TCOFFSETHACK", sel.tcoffsethack); AddMacro(ss, "PS_POINT_SAMPLER", sel.point_sampler); + AddMacro(ss, "PS_REGION_RECT", sel.region_rect); AddMacro(ss, "PS_BLEND_A", sel.blend_a); AddMacro(ss, "PS_BLEND_B", sel.blend_b); AddMacro(ss, "PS_BLEND_C", sel.blend_c); @@ -3058,7 +3059,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) GSTextureVK* draw_ds = static_cast(config.ds); GSTextureVK* draw_rt_clone = nullptr; GSTextureVK* hdr_rt = nullptr; - GSTextureVK* copy_ds = nullptr; // Switch to hdr target for colclip rendering if (pipe.ps.hdr) @@ -3109,26 +3109,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) } } - if (config.tex) - { - if (config.tex == config.ds) - { - // requires a copy of the depth buffer. this is mainly for ico. - copy_ds = static_cast(CreateDepthStencil(rtsize.x, rtsize.y, GSTexture::Format::DepthStencil, false)); - if (copy_ds) - { - EndRenderPass(); - - GL_PUSH("Copy depth to temp texture for shuffle {%d,%d %dx%d}", - config.drawarea.left, config.drawarea.top, - config.drawarea.width(), config.drawarea.height()); - - pxAssert(copy_ds->GetState() == GSTexture::State::Invalidated); - CopyRect(config.ds, copy_ds, GSVector4i(config.ds->GetSize()).zwxy(), 0, 0); - PSSetShaderResource(0, copy_ds, true); - } - } - } // clear texture binding when it's bound to RT or DS if (!config.tex && ((config.rt && static_cast(config.rt)->GetView() == m_tfx_textures[0]) || (config.ds && static_cast(config.ds)->GetView() == m_tfx_textures[0]))) @@ -3258,9 +3238,6 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) } } - if (copy_ds) - Recycle(copy_ds); - if (draw_rt_clone) Recycle(draw_rt_clone);