diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 518185aade..ab15f78c95 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -21,6 +21,8 @@ #define PS_FST 0 #define PS_WMS 0 #define PS_WMT 0 +#define PS_ADJS 0 +#define PS_ADJT 0 #define PS_AEM_FMT FMT_32 #define PS_AEM 0 #define PS_TFX 0 @@ -42,7 +44,6 @@ #define PS_CHANNEL_FETCH 0 #define PS_TALES_OF_ABYSS_HLE 0 #define PS_URBAN_CHAOS_HLE 0 -#define PS_INVALID_TEX0 0 #define PS_SCALE_FACTOR 1.0 #define PS_HDR 0 #define PS_COLCLIP 0 @@ -158,10 +159,10 @@ cbuffer cb1 float2 TA; float MaxDepthPS; float Af; - uint4 MskFix; uint4 FbMask; float4 HalfTexel; float4 MinMax; + float4 STRange; int4 ChannelShuffle; float2 TC_OffsetHack; float2 STScale; @@ -183,7 +184,20 @@ float4 sample_c(float2 uv, float uv_w) // As of 2018 this issue is still present. uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw; } +#if !PS_ADJS && !PS_ADJT uv *= STScale; +#else + #if PS_ADJS + uv.x = (uv.x - STRange.x) * STRange.z; + #else + uv.x = uv.x * STScale.x; + #endif + #if PS_ADJT + uv.y = (uv.y - STRange.y) * STRange.w; + #else + uv.y = uv.y * STScale.y; + #endif +#endif #if PS_AUTOMATIC_LOD == 1 return Texture.Sample(TextureSampler, uv); @@ -218,12 +232,7 @@ float4 sample_p_norm(float u) float4 clamp_wrap_uv(float4 uv) { - float4 tex_size; - - if (PS_INVALID_TEX0 == 1) - tex_size = WH.zwzw; - else - tex_size = WH.xyxy; + float4 tex_size = WH.xyxy; if(PS_WMS == PS_WMT) { @@ -238,7 +247,7 @@ float4 clamp_wrap_uv(float4 uv) // textures. Fixes Xenosaga's hair issue. uv = frac(uv); #endif - uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; + uv = (float4)(((uint4)(uv * tex_size) & asuint(MinMax.xyxy)) | asuint(MinMax.zwzw)) / tex_size; } } else @@ -252,7 +261,7 @@ float4 clamp_wrap_uv(float4 uv) #if PS_FST == 0 uv.xz = frac(uv.xz); #endif - uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & asuint(MinMax.xx)) | asuint(MinMax.zz)) / tex_size.xx; } if(PS_WMT == 2) { @@ -263,7 +272,7 @@ float4 clamp_wrap_uv(float4 uv) #if PS_FST == 0 uv.yw = frac(uv.yw); #endif - uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & asuint(MinMax.yy)) | asuint(MinMax.ww)) / tex_size.yy; } } @@ -353,7 +362,7 @@ float4 fetch_c(int2 uv) int2 clamp_wrap_uv_depth(int2 uv) { - int4 mask = (int4)MskFix << 4; + int4 mask = asint(MinMax) << 4; if (PS_WMS == PS_WMT) { if (PS_WMS == 2) @@ -676,11 +685,7 @@ float4 fog(float4 c, float f) float4 ps_color(PS_INPUT input) { -#if PS_FST == 0 && PS_INVALID_TEX0 == 1 - // Re-normalize coordinate from invalid GS to corrected texture size - float2 st = (input.t.xy * WH.xy) / (input.t.w * WH.zw); - float2 st_int = (input.ti.zw * WH.xy) / (input.t.w * WH.zw); -#elif PS_FST == 0 +#if PS_FST == 0 float2 st = input.t.xy / input.t.w; float2 st_int = input.ti.zw / input.t.w; #else diff --git a/bin/resources/shaders/opengl/common_header.glsl b/bin/resources/shaders/opengl/common_header.glsl index ac08be64d6..9a7ce8589c 100644 --- a/bin/resources/shaders/opengl/common_header.glsl +++ b/bin/resources/shaders/opengl/common_header.glsl @@ -75,13 +75,12 @@ layout(std140, binding = 0) uniform cb21 float MaxDepthPS; float Af; - uvec4 MskFix; - uvec4 FbMask; vec4 HalfTexel; vec4 MinMax; + vec4 STRange; ivec4 ChannelShuffle; @@ -92,11 +91,6 @@ layout(std140, binding = 0) uniform cb21 }; #endif -//layout(std140, binding = 22) uniform cb22 -//{ -// vec4 rt_size; -//}; - ////////////////////////////////////////////////////////////////////// // Default Sampler ////////////////////////////////////////////////////////////////////// diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 669cd34448..2f0a82aef2 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -109,7 +109,20 @@ vec4 sample_c(vec2 uv) // As of 2018 this issue is still present. uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw; #endif - uv *= STScale; +#if !PS_ADJS && !PS_ADJT + uv *= STScale; +#else + #if PS_ADJS + uv.x = (uv.x - STRange.x) * STRange.z; + #else + uv.x = uv.x * STScale.x; + #endif + #if PS_ADJT + uv.y = (uv.y - STRange.y) * STRange.w; + #else + uv.y = uv.y * STScale.y; + #endif +#endif #if PS_AUTOMATIC_LOD == 1 return texture(TextureSampler, uv); @@ -146,11 +159,7 @@ vec4 sample_p_norm(float u) vec4 clamp_wrap_uv(vec4 uv) { vec4 uv_out = uv; -#if PS_INVALID_TEX0 == 1 - vec4 tex_size = WH.zwzw; -#else vec4 tex_size = WH.xyxy; -#endif #if PS_WMS == PS_WMT @@ -162,7 +171,7 @@ vec4 clamp_wrap_uv(vec4 uv) // textures. Fixes Xenosaga's hair issue. uv = fract(uv); #endif - uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; + uv_out = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size; #endif #else // PS_WMS != PS_WMT @@ -174,7 +183,7 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_FST == 0 uv.xz = fract(uv.xz); #endif - uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx; #endif @@ -185,7 +194,7 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_FST == 0 uv.yw = fract(uv.yw); #endif - uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy; #endif #endif @@ -288,7 +297,7 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv) // Keep the full precision // It allow to multiply the ScalingFactor before the 1/16 coeff - ivec4 mask = ivec4(MskFix) << 4; + ivec4 mask = floatBitsToInt(MinMax) << 4; #if PS_WMS == PS_WMT @@ -591,11 +600,7 @@ void fog(inout vec4 C, float f) vec4 ps_color() { //FIXME: maybe we can set gl_Position.w = q in VS -#if (PS_FST == 0) && (PS_INVALID_TEX0 == 1) - // Re-normalize coordinate from invalid GS to corrected texture size - vec2 st = (PSin.t_float.xy * WH.xy) / (vec2(PSin.t_float.w) * WH.zw); - vec2 st_int = (PSin.t_int.zw * WH.xy) / (vec2(PSin.t_float.w) * WH.zw); -#elif (PS_FST == 0) +#if (PS_FST == 0) vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w); vec2 st_int = PSin.t_int.zw / vec2(PSin.t_float.w); #else diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 1f588a804e..bdb336f0eb 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -312,6 +312,8 @@ void main() #define PS_FST 0 #define PS_WMS 0 #define PS_WMT 0 +#define PS_ADJS 0 +#define PS_ADJT 0 #define PS_FMT FMT_32 #define PS_AEM 0 #define PS_TFX 0 @@ -332,7 +334,6 @@ void main() #define PS_CHANNEL_FETCH 0 #define PS_TALES_OF_ABYSS_HLE 0 #define PS_URBAN_CHAOS_HLE 0 -#define PS_INVALID_TEX0 0 #define PS_SCALE_FACTOR 1.0 #define PS_HDR 0 #define PS_COLCLIP 0 @@ -361,10 +362,10 @@ layout(std140, set = 0, binding = 1) uniform cb1 vec2 TA; float MaxDepthPS; float Af; - uvec4 MskFix; uvec4 FbMask; vec4 HalfTexel; vec4 MinMax; + vec4 STRange; ivec4 ChannelShuffle; vec2 TC_OffsetHack; vec2 STScale; @@ -420,7 +421,20 @@ vec4 sample_c(vec2 uv) // As of 2018 this issue is still present. uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw; #endif +#if !PS_ADJS && !PS_ADJT uv *= STScale; +#else + #if PS_ADJS + uv.x = (uv.x - STRange.x) * STRange.z; + #else + uv.x = uv.x * STScale.x; + #endif + #if PS_ADJT + uv.y = (uv.y - STRange.y) * STRange.w; + #else + uv.y = uv.y * STScale.y; + #endif +#endif #if PS_AUTOMATIC_LOD == 1 return texture(Texture, uv); @@ -455,13 +469,7 @@ vec4 sample_p_norm(float u) vec4 clamp_wrap_uv(vec4 uv) { - vec4 tex_size; - - #if PS_INVALID_TEX0 - tex_size = WH.zwzw; - #else - tex_size = WH.xyxy; - #endif + vec4 tex_size = WH.xyxy; #if PS_WMS == PS_WMT { @@ -476,7 +484,7 @@ vec4 clamp_wrap_uv(vec4 uv) // textures. Fixes Xenosaga's hair issue. uv = fract(uv); #endif - uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; + uv = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size; } #endif } @@ -491,7 +499,7 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_FST == 0 uv.xz = fract(uv.xz); #endif - uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx; } #endif #if PS_WMT == 2 @@ -503,7 +511,7 @@ vec4 clamp_wrap_uv(vec4 uv) #if PS_FST == 0 uv.yw = fract(uv.yw); #endif - uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy; } #endif } @@ -590,7 +598,7 @@ vec4 fetch_c(ivec2 uv) ivec2 clamp_wrap_uv_depth(ivec2 uv) { - ivec4 mask = ivec4(MskFix << 4); + ivec4 mask = floatBitsToInt(MinMax) << 4; #if (PS_WMS == PS_WMT) { #if (PS_WMS == 2) @@ -907,11 +915,7 @@ vec4 fog(vec4 c, float f) vec4 ps_color() { -#if PS_FST == 0 && PS_INVALID_TEX0 == 1 - // Re-normalize coordinate from invalid GS to corrected texture size - vec2 st = (vsIn.t.xy * WH.xy) / (vsIn.t.w * WH.zw); - vec2 st_int = (vsIn.ti.zw * WH.xy) / (vsIn.t.w * WH.zw); -#elif PS_FST == 0 +#if PS_FST == 0 vec2 st = vsIn.t.xy / vsIn.t.w; vec2 st_int = vsIn.ti.zw / vsIn.t.w; #else diff --git a/pcsx2/GS/GSDrawingContext.cpp b/pcsx2/GS/GSDrawingContext.cpp index 522185836a..011a664781 100644 --- a/pcsx2/GS/GSDrawingContext.cpp +++ b/pcsx2/GS/GSDrawingContext.cpp @@ -130,7 +130,7 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, res.TW = tw > 10 ? 0 : tw; res.TH = th > 10 ? 0 : th; - if (GSConfig.Renderer == GSRendererType::SW && (TEX0.TW != res.TW || TEX0.TH != res.TH)) + if (TEX0.TW != res.TW || TEX0.TH != res.TH) { GL_DBG("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)", (int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM, @@ -142,50 +142,3 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear, return res; } - -void GSDrawingContext::ComputeFixedTEX0(const GSVector4& st) -{ - // It is quite complex to handle rescaling so this function is less stricter than GetSizeFixedTEX0, - // therefore we remove the reduce optimization and we don't handle bilinear filtering which might create wrong interpolation at the border. - int tw = TEX0.TW; - int th = TEX0.TH; - - int wms = (int)CLAMP.WMS; - int wmt = (int)CLAMP.WMT; - - int minu = (int)CLAMP.MINU; - int minv = (int)CLAMP.MINV; - int maxu = (int)CLAMP.MAXU; - int maxv = (int)CLAMP.MAXV; - - if (wms != CLAMP_REGION_CLAMP) - tw = tw > 10 ? 0 : tw; - - if (wmt != CLAMP_REGION_CLAMP) - th = th > 10 ? 0 : th; - - GSVector4i uv = GSVector4i(st.floor().xyzw(st.ceil())); - - uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu); - uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv); - - if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT) - tw = extend(uv.x, tw); - - if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT) - th = extend(uv.y, th); - - tw = std::clamp(tw, 0, 10); - th = std::clamp(th, 0, 10); - - if ((tw != (int)TEX0.TW) || (th != (int)TEX0.TH)) - { - m_fixed_tex0 = true; - TEX0.TW = tw; - TEX0.TH = th; - - GL_DBG("FixedTEX0 TW %d=>%d, TH %d=>%d wm %d,%d", - (int)stack.TEX0.TW, (int)TEX0.TW, (int)stack.TEX0.TH, (int)TEX0.TH, - (int)CLAMP.WMS, (int)CLAMP.WMT); - } -} diff --git a/pcsx2/GS/GSDrawingContext.h b/pcsx2/GS/GSDrawingContext.h index dedba2910c..2122880383 100644 --- a/pcsx2/GS/GSDrawingContext.h +++ b/pcsx2/GS/GSDrawingContext.h @@ -69,12 +69,8 @@ public: GIFRegZBUF ZBUF; } stack; - bool m_fixed_tex0; - GSDrawingContext() { - m_fixed_tex0 = false; - memset(&offset, 0, sizeof(offset)); Reset(); @@ -140,8 +136,6 @@ public: } GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const; - void ComputeFixedTEX0(const GSVector4& st); - bool HasFixedTEX0() const { return m_fixed_tex0; } // Save & Restore before/after draw allow to correct/optimize current register for current draw // Note: we could avoid the restore part if all renderer code is updated to use a local copy instead @@ -159,9 +153,6 @@ public: stack.FBA = FBA; stack.FRAME = FRAME; stack.ZBUF = ZBUF; - - // This function is called before the draw so take opportunity to reset m_fixed_tex0 - m_fixed_tex0 = false; } void RestoreReg() diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h index 1ab1aa12cd..4076141964 100644 --- a/pcsx2/GS/GSRegs.h +++ b/pcsx2/GS/GSRegs.h @@ -823,6 +823,7 @@ union REG_END2 __forceinline bool IsRepeating() const { + // This is actually "does the texture span more than one page". if (TBW < 2) { if (PSM == PSM_PSMT8) diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 5077d94f3a..0fda71972e 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -1699,7 +1699,6 @@ inline void GSState::CopyEnv(GSDrawingEnvironment* dest, GSDrawingEnvironment* s { memcpy(dest, src, 88); memcpy(&dest->CTXT[ctx], &src->CTXT[ctx], 96); - dest->CTXT[ctx].m_fixed_tex0 = src->CTXT[ctx].m_fixed_tex0; } void GSState::Flush(GSFlushReason reason) @@ -3583,8 +3582,11 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c const int minu = (int)CLAMP.MINU; const int minv = (int)CLAMP.MINV; - const int maxu = (int)CLAMP.MAXU; - const int maxv = (int)CLAMP.MAXV; + + // For the FixedTEX0 case, in hardware, we handle this in the texture cache. Don't OR the bits in here, otherwise + // we'll end up with an invalid rectangle, we want the passed-in rectangle to be relative to the normalized size. + const int maxu = (wms != CLAMP_REGION_REPEAT || (int)CLAMP.MAXU < w) ? (int)CLAMP.MAXU : 0; + const int maxv = (wmt != CLAMP_REGION_REPEAT || (int)CLAMP.MAXV < h) ? (int)CLAMP.MAXV : 0; GSVector4i vr = tr; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 74357290c4..5b536207e1 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -309,6 +309,8 @@ struct alignas(16) GSHWDrawConfig u32 tcc : 1; u32 wms : 2; u32 wmt : 2; + u32 adjs : 1; + u32 adjt : 1; u32 ltf : 1; // Shuffle and fbmask effect u32 shuffle : 1; @@ -352,7 +354,6 @@ struct alignas(16) GSHWDrawConfig u32 automatic_lod : 1; u32 manual_lod : 1; u32 point_sampler : 1; - u32 invalid_tex0 : 1; // Lupin the 3rd // Scan mask u32 scanmsk : 2; @@ -554,11 +555,11 @@ struct alignas(16) GSHWDrawConfig GSVector4 FogColor_AREF; GSVector4 WH; GSVector4 TA_MaxDepth_Af; - GSVector4i MskFix; GSVector4i FbMask; GSVector4 HalfTexel; GSVector4 MinMax; + GSVector4 STRange; GSVector4i ChannelShuffle; GSVector2 TCOffsetHack; GSVector2 STScale; diff --git a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp index 60c3621568..fafe075d88 100644 --- a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp @@ -142,6 +142,8 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_FST", sel.fst); sm.AddMacro("PS_WMS", sel.wms); sm.AddMacro("PS_WMT", sel.wmt); + sm.AddMacro("PS_ADJS", sel.adjs); + sm.AddMacro("PS_ADJT", sel.adjt); sm.AddMacro("PS_AEM_FMT", sel.aem_fmt); sm.AddMacro("PS_AEM", sel.aem); sm.AddMacro("PS_TFX", sel.tfx); @@ -164,7 +166,6 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_DFMT", sel.dfmt); sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt); sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); - sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0); sm.AddMacro("PS_HDR", sel.hdr); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 0b24c4c735..e67fd9bde5 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -1483,6 +1483,8 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_FST", sel.fst); sm.AddMacro("PS_WMS", sel.wms); sm.AddMacro("PS_WMT", sel.wmt); + sm.AddMacro("PS_ADJS", sel.adjs); + sm.AddMacro("PS_ADJT", sel.adjt); sm.AddMacro("PS_AEM_FMT", sel.aem_fmt); sm.AddMacro("PS_AEM", sel.aem); sm.AddMacro("PS_TFX", sel.tfx); @@ -1505,7 +1507,6 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_DFMT", sel.dfmt); sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt); sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); - sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0); sm.AddMacro("PS_HDR", sel.hdr); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index e5ae60f811..bb94c21ab9 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1279,10 +1279,6 @@ void GSRendererHW::Draw() return; } - // Fix TEX0 size - if (PRIM->TME && !IsMipMapActive()) - m_context->ComputeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t)); - // skip alpha test if possible // Note: do it first so we know if frame/depth writes are masked @@ -1528,8 +1524,8 @@ void GSRendererHW::Draw() TextureMinMaxResult tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear()); - m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, tmm.coverage) : - m_tc->LookupSource(TEX0, env.TEXA, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || + m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage) : + m_tc->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic || GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr); // Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target. @@ -1642,7 +1638,7 @@ void GSRendererHW::Draw() for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++) { - const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer); + const GIFRegTEX0 MIP_TEX0(GetTex0Layer(layer)); m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM); @@ -3105,6 +3101,26 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& } } +__ri static constexpr bool IsRedundantClamp(u8 clamp, u32 clamp_min, u32 clamp_max, u32 tsize) +{ + // Don't shader sample when the clamp/repeat is configured to the texture size. + // That way trilinear etc still works. + const u32 textent = (1u << tsize) - 1u; + if (clamp == CLAMP_REGION_CLAMP) + return (clamp_min == 0 && clamp_max == textent); + else if (clamp == CLAMP_REGION_REPEAT) + return (clamp_max == 0 && clamp_min == textent); + else + return false; +} + +__ri static constexpr u8 EffectiveClamp(u8 clamp, bool has_region) +{ + // When we have extracted the region in the texture, we can use the hardware sampler for repeat/clamp. + // (weird flip here because clamp/repeat is inverted for region vs non-region). + return (clamp >= CLAMP_REGION_CLAMP && has_region) ? (clamp ^ 3) : clamp; +} + void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) { // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. @@ -3112,9 +3128,16 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; - const u8 wms = m_context->CLAMP.WMS; - const u8 wmt = m_context->CLAMP.WMT; + static constexpr const char* clamp_modes[] = { "REPEAT", "CLAMP", "REGION_CLAMP", "REGION_REPEAT" }; + const bool redundant_wms = IsRedundantClamp(m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, tex->m_TEX0.TW); + const bool redundant_wmt = IsRedundantClamp(m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, tex->m_TEX0.TH); + const u8 wms = EffectiveClamp(m_context->CLAMP.WMS, tex->m_region.HasX()); + const u8 wmt = EffectiveClamp(m_context->CLAMP.WMT, tex->m_region.HasY()); const bool complex_wms_wmt = !!((wms | wmt) & 2); + GL_CACHE("WMS: %s [%s%s] WMT: %s [%s%s] Complex: %d MINU: %d MINV: %d MINV: %d MAXV: %d", + clamp_modes[m_context->CLAMP.WMS], redundant_wms ? "redundant," : "", clamp_modes[wms], + clamp_modes[m_context->CLAMP.WMT], redundant_wmt ? "redundant," : "", clamp_modes[wmt], + complex_wms_wmt, m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); const bool need_mipmap = IsMipMapDraw(); const bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt || psm.depth; @@ -3290,14 +3313,38 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) const GSVector4 st_scale = WH.zwzw() / GSVector4(w, h).xyxy(); m_conf.cb_ps.STScale = GSVector2(st_scale.x, st_scale.y); + if (tex->m_region.HasX()) + { + m_conf.cb_ps.STRange.x = static_cast(tex->m_region.GetMinX()) / static_cast(miptw); + m_conf.cb_ps.STRange.z = static_cast(miptw) / static_cast(tex->m_region.GetWidth()); + m_conf.ps.adjs = 1; + } + if (tex->m_region.HasY()) + { + m_conf.cb_ps.STRange.y = static_cast(tex->m_region.GetMinY()) / static_cast(mipth); + m_conf.cb_ps.STRange.w = static_cast(mipth) / static_cast(tex->m_region.GetHeight()); + m_conf.ps.adjt = 1; + } + m_conf.ps.fst = !!PRIM->FST; m_conf.cb_ps.WH = WH; m_conf.cb_ps.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); if (complex_wms_wmt) { - m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);; - m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy(); + const GSVector4i clamp(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); + const GSVector4 region_repeat(GSVector4::cast(clamp)); + const GSVector4 region_clamp(GSVector4(clamp) / WH.xyxy()); + if (wms >= CLAMP_REGION_CLAMP) + { + m_conf.cb_ps.MinMax.x = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.x : region_repeat.x; + m_conf.cb_ps.MinMax.z = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.z : region_repeat.z; + } + if (wmt >= CLAMP_REGION_CLAMP) + { + m_conf.cb_ps.MinMax.y = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.y : region_repeat.y; + m_conf.cb_ps.MinMax.w = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.w : region_repeat.w; + } } else if (trilinear_manual) { @@ -3318,18 +3365,6 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) m_conf.cb_ps.TCOffsetHack = GSVector2(tc_oh_ts.z, tc_oh_ts.w); m_conf.cb_vs.texture_scale = GSVector2(tc_oh_ts.x, tc_oh_ts.y); - // Must be done after all coordinates math - if (m_context->HasFixedTEX0() && !PRIM->FST) - { - m_conf.ps.invalid_tex0 = 1; - // Use invalid size to denormalize ST coordinate - m_conf.cb_ps.WH.x = static_cast(1 << m_context->stack.TEX0.TW); - m_conf.cb_ps.WH.y = static_cast(1 << m_context->stack.TEX0.TH); - - // We can't handle m_target with invalid_tex0 atm due to upscaling - ASSERT(!tex->m_target); - } - // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader m_conf.sampler.tau = (wms != CLAMP_CLAMP); m_conf.sampler.tav = (wmt != CLAMP_CLAMP); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index a6c4c33040..56d2b564f0 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -119,7 +119,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm target->m_dirty.push_back(GSDirtyRect(rect, psm, bw)); } -GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette) +GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette) { if (GSConfig.UserHacks_DisableDepthSupport) { @@ -177,7 +177,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 TEX0.TBP0, psm_str(psm)); // Create a shared texture source - src = new Source(TEX0, TEXA, true); + src = new Source(TEX0, TEXA); src->m_texture = dst->m_texture; src->m_shared_texture = true; src->m_target = true; // So renderer can check if a conversion is required @@ -201,7 +201,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 else if (g_gs_renderer->m_game.title == CRC::SVCChaos || g_gs_renderer->m_game.title == CRC::KOF2002) { // SVCChaos black screen & KOF2002 blue screen on main menu, regardless of depth enabled or disabled. - return LookupSource(TEX0, TEXA, r, nullptr); + return LookupSource(TEX0, TEXA, CLAMP, r, nullptr); } else { @@ -227,24 +227,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0 return src; } -GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod) +__ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, + const GSLocalMemory::psm_t& psm_s, const u32* clut, const GSTexture* gpu_clut, const GSVector2i& compare_lod, + const GSTextureCache::SourceRegion& region, FastList& map) { - GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP); - - const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; - //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; - - const u32* const clut = g_gs_renderer->m_mem.m_clut; - GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr; - - Source* src = NULL; - - auto& m = m_src.m_map[TEX0.TBP0 >> 5]; - - const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0)); - for (auto i = m.begin(); i != m.end(); ++i) + for (auto i = map.begin(); i != map.end(); ++i) { - Source* s = *i; + GSTextureCache::Source* s = *i; if (((TEX0.U32[0] ^ s->m_TEX0.U32[0]) | ((TEX0.U32[1] ^ s->m_TEX0.U32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH continue; @@ -272,20 +261,92 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con continue; } + if (s->m_region.bits != 0 && s->m_region.bits != region.bits) + continue; + // Same base mip texture, but we need to check that MXL was the same as well. // When mipmapping is off, this will be 0,0 vs 0,0. if (s->m_lod != compare_lod) continue; } - m.MoveFront(i.Index()); - - src = s; - - break; + map.MoveFront(i.Index()); + return s; } - Target* dst = NULL; + return nullptr; +} + +GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod) +{ + GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH); + + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; + + const u32* const clut = g_gs_renderer->m_mem.m_clut; + GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr; + + SourceRegion region = {}; + if (CLAMP.WMS == CLAMP_REGION_CLAMP && CLAMP.MAXU >= CLAMP.MINU) + { + // Another Lupin case here, it uses region clamp with UV (not ST), puts a clamp region further + // into the texture, but a smaller TW/TH. Catch this by looking for a clamp range above TW. + const u32 rw = CLAMP.MAXU - CLAMP.MAXU + 1; + if (rw < (1u << TEX0.TW) || CLAMP.MAXU >= (1u << TEX0.TW)) + { + region.SetX(CLAMP.MINU, CLAMP.MAXU + 1); + GL_CACHE("TC: Region clamp optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); + } + } + else if (CLAMP.WMS == CLAMP_REGION_REPEAT && CLAMP.MINU != 0) + { + // Lupin the 3rd is really evil, it sets TW/TH to the texture size, but then uses region repeat + // to offset the actual texture data to elsewhere. So, we'll just force any cases like this down + // the region texture path. + const u32 rw = ((CLAMP.MINU | CLAMP.MAXU) - CLAMP.MAXU) + 1; + if (rw < (1u << TEX0.TW) || CLAMP.MAXU != 0) + { + region.SetX(CLAMP.MAXU, (CLAMP.MINU | CLAMP.MAXU) + 1); + GL_CACHE("TC: Region repeat optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth()); + } + } + if (CLAMP.WMT == CLAMP_REGION_CLAMP && CLAMP.MAXV >= CLAMP.MINV) + { + const u32 rh = CLAMP.MAXV - CLAMP.MINV + 1; + if (rh < (1u << TEX0.TH) || CLAMP.MAXV >= (1u << TEX0.TH)) + { + region.SetY(CLAMP.MINV, CLAMP.MAXV + 1); + GL_CACHE("TC: Region clamp optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); + } + } + else if (CLAMP.WMT == CLAMP_REGION_REPEAT && CLAMP.MINV != 0) + { + const u32 rh = ((CLAMP.MINV | CLAMP.MAXV) - CLAMP.MAXV) + 1; + if (rh < (1u << TEX0.TH) || CLAMP.MAXV != 0) + { + region.SetY(CLAMP.MAXV, (CLAMP.MINV | CLAMP.MAXV) + 1); + GL_CACHE("TC: Region repeat optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight()); + } + } + + const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0)); + Source* src = nullptr; + + // Region textures might be placed in a different page, so check that first. + const u32 lookup_page = TEX0.TBP0 >> 5; + if (region.GetMinX() != 0 || region.GetMinY() != 0) + { + const GSOffset offset(psm_s.info, TEX0.TBP0, TEX0.TBW, TEX0.PSM); + const u32 region_page = offset.bn(region.GetMinX(), region.GetMinY()) >> 5; + if (lookup_page != region_page) + src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[region_page]); + } + if (!src) + src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[lookup_page]); + + + Target* dst = nullptr; bool half_right = false; int x_offset = 0; int y_offset = 0; @@ -293,7 +354,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con #ifdef DISABLE_HW_TEXTURE_CACHE if (0) #else - if (src == NULL) + if (!src) #endif { const u32 bp = TEX0.TBP0; @@ -466,11 +527,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con GIFRegTEX0 depth_TEX0; depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u); depth_TEX0.U32[1] = TEX0.U32[1]; - return LookupDepthSource(depth_TEX0, TEXA, r); + return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r); } else { - return LookupDepthSource(TEX0, TEXA, r, true); + return LookupDepthSource(TEX0, TEXA, CLAMP, r, true); } } } @@ -496,7 +557,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); } #endif - src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut); + src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region); } else { @@ -1893,13 +1954,13 @@ void GSTextureCache::IncAge() } //Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work. -GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut) +GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; - Source* src = new Source(TEX0, TEXA, false); + Source* src = new Source(TEX0, TEXA); - const int tw = 1 << TEX0.TW; - const int th = 1 << TEX0.TH; + int tw = 1 << TEX0.TW; + int th = 1 << TEX0.TH; //int tp = TEX0.TBW << 6; int tlevels = 1; if (lod) @@ -2211,8 +2272,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0) || gpu_clut; const u32* clut = (psm.pal > 0) ? static_cast(g_gs_renderer->m_mem.m_clut) : nullptr; + // adjust texture size to fit + src->m_region = region; + tw = region.HasX() ? region.GetWidth() : tw; + th = region.HasY() ? region.GetHeight() : th; + // try the hash cache - if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr) + if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr) { src->m_texture = src->m_from_hash_cache->texture; if (gpu_clut) @@ -2245,6 +2311,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con ASSERT(src->m_from_target == (dst ? &dst->m_texture : nullptr)); ASSERT(src->m_texture->GetScale() == ((!dst || TEX0.PSM == PSM_PSMT8) ? GSVector2(1, 1) : dst->m_texture->GetScale())); + src->SetPages(); + m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex); return src; @@ -2253,7 +2321,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // This really needs a better home... extern bool FMVstarted; -GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod) +GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region) { // don't bother hashing if we're not dumping or replacing. const bool dump = GSConfig.DumpReplaceableTextures && (!FMVstarted || GSConfig.DumpTexturesWithFMVActive) && @@ -2265,13 +2333,13 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 // need the hash either for replacing, dumping or caching. // if dumping/replacing is on, we compute the clut hash regardless, since replacements aren't indexed - HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod)}; + HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod, region)}; // handle dumping first, this is mostly isolated. if (dump) { // dump base level - GSTextureReplacements::DumpTexture(key, TEX0, TEXA, g_gs_renderer->m_mem, 0); + GSTextureReplacements::DumpTexture(key, TEX0, TEXA, region, g_gs_renderer->m_mem, 0); // and the mips if (lod && GSConfig.DumpReplaceableMipmaps) @@ -2281,7 +2349,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 for (int mip = 1; mip < nmips; mip++) { const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)}; - GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, g_gs_renderer->m_mem, mip); + GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, region, g_gs_renderer->m_mem, mip); } } } @@ -2355,8 +2423,8 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 return nullptr; // expand/upload texture - const int tw = 1 << TEX0.TW; - const int th = 1 << TEX0.TH; + const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW); + const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); const int tlevels = lod ? ((GSConfig.HWMipmap != HWMipmapLevel::Full) ? -1 : (lod->y - lod->x + 1)) : 1; GSTexture* tex = g_gs_device->CreateTexture(tw, th, tlevels, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color); if (!tex) @@ -2366,7 +2434,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 } // upload base level - PreloadTexture(TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, 0); + PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0); // upload mips if present if (lod) @@ -2376,7 +2444,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 for (int mip = 1; mip < nmips; mip++) { const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)}; - PreloadTexture(MIP_TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, mip); + PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip); } } @@ -2649,12 +2717,13 @@ bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i // GSTextureCache::Source -GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container) +GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) : m_palette_obj(nullptr) , m_palette(nullptr) , m_valid_rect(0, 0) , m_lod(0, 0) , m_target(false) + , m_repeating(false) , m_p2t(NULL) , m_from_target(NULL) , m_from_target_TEX0(TEX0) @@ -2662,32 +2731,8 @@ GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, b m_TEX0 = TEX0; m_TEXA = TEXA; - if (dummy_container) - { - // Dummy container only contain a m_texture that is a pointer to another source. - - m_write.rect = NULL; - m_write.count = 0; - - m_repeating = false; - } - else - { - memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0)); - memset(m_layer_hash, 0, sizeof(m_layer_hash)); - - m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32); - m_write.count = 0; - - m_repeating = m_TEX0.IsRepeating(); - - if (m_repeating && !CanPreload()) - { - m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0); - } - - m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - } + memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0)); + memset(m_layer_hash, 0, sizeof(m_layer_hash)); } GSTextureCache::Source::~Source() @@ -2703,6 +2748,23 @@ GSTextureCache::Source::~Source() } } +void GSTextureCache::Source::SetPages() +{ + const int tw = 1 << m_TEX0.TW; + const int th = 1 << m_TEX0.TH; + + m_repeating = !m_from_hash_cache && m_TEX0.IsRepeating() && !m_region.IsFixedTEX0(tw, th); + + if (m_repeating && !CanPreload()) + { + // TODO: wrong for lupin/invalid tex0 + m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0); + } + + const GSVector4i rect(m_region.GetRect(tw, th)); + m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(rect); +} + void GSTextureCache::Source::Update(const GSVector4i& rect, int level) { Surface::UpdateAge(); @@ -2719,9 +2781,17 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int level) const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; const int tw = 1 << m_TEX0.TW; const int th = 1 << m_TEX0.TH; - const GSVector4i r = rect.ralign(bs); - if (r.eq(GSVector4i(0, 0, tw, th))) + GSVector4i r(rect); + const GSVector4i region_rect(m_region.GetRect(tw, th)); + + // Offset the pages we use by the clamp region. + if (m_region.HasEither()) + r = (r + m_region.GetOffset(tw, th)).rintersect(region_rect); + + r = r.ralign(bs); + + if (region_rect.eq(m_region.HasEither() ? r.rintersect(region_rect) : r)) m_complete_layers |= (1u << level); const GSOffset& off = g_gs_renderer->m_context->offset.tex; @@ -2818,6 +2888,9 @@ void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4 void GSTextureCache::Source::Write(const GSVector4i& r, int layer) { + if (!m_write.rect) + m_write.rect = static_cast(_aligned_malloc(3 * sizeof(GSVector4i), 32)); + m_write.rect[m_write.count++] = r; while (m_write.count >= 2) @@ -2857,11 +2930,12 @@ void GSTextureCache::Source::Flush(u32 count, int layer) // However the function is never called for these cases. This is just for information // should someone wish to use this function for these cases later. const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; + const SourceRegion region((layer == 0) ? m_region : m_region.AdjustForMipmap(layer)); - const int tw = 1 << m_TEX0.TW; - const int th = 1 << m_TEX0.TH; - - const GSVector4i tr(0, 0, tw, th); + // For the invalid tex0 case, the region might be larger than TEX0.TW/TH. + const int tw = std::max(region.GetWidth(), 1u << m_TEX0.TW); + const int th = std::max(region.GetHeight(), 1u << m_TEX0.TH); + const GSVector4i tex_r(region.GetRect(tw, th)); int pitch = std::max(tw, psm.bs.x) * sizeof(u32); @@ -2877,35 +2951,33 @@ void GSTextureCache::Source::Flush(u32 count, int layer) rtx = psm.rtxP; } - u8* buff = s_unswizzle_buffer; - for (u32 i = 0; i < count; i++) { - const GSVector4i r = m_write.rect[i]; + const GSVector4i r(m_write.rect[i]); - if ((r > tr).mask() & 0xff00) - { - rtx(mem, off, r, buff, pitch, m_TEXA); - - m_texture->Update(r.rintersect(tr), buff, pitch, layer); - } - else + // if update rect lies to the left/above of the region rectangle, or extends past the texture bounds, we can't use a direct map + if (((r > tex_r).mask() & 0xff00) == 0 && ((tex_r > r).mask() & 0x00ff) == 0) { GSTexture::GSMap m; - - if (m_texture->Map(m, &r, layer)) + const GSVector4i map_r(r - tex_r.xyxy()); + if (m_texture->Map(m, &map_r, layer)) { rtx(mem, off, r, m.bits, m.pitch, m_TEXA); - m_texture->Unmap(); - } - else - { - rtx(mem, off, r, buff, pitch, m_TEXA); - - m_texture->Update(r, buff, pitch, layer); + continue; } } + + const GSVector4i rint(r.rintersect(tex_r)); + if (rint.width() == 0 || rint.height() == 0) + continue; + + rtx(mem, off, r, s_unswizzle_buffer, pitch, m_TEXA); + + // need to offset if we're a region texture + const u8* src = s_unswizzle_buffer + (pitch * static_cast(std::max(tex_r.top - r.top, 0))) + + (static_cast(std::max(tex_r.left - r.left, 0)) << (m_palette ? 0 : 2)); + m_texture->Update(rint - tex_r.xyxy(), src, pitch, layer); } if (count < m_write.count) @@ -2920,7 +2992,7 @@ void GSTextureCache::Source::Flush(u32 count, int layer) void GSTextureCache::Source::PreloadLevel(int level) { // m_TEX0 is adjusted for mips (messy, should be changed). - const HashType hash = HashTexture(m_TEX0, m_TEXA); + const HashType hash = HashTexture(m_TEX0, m_TEXA, m_region); // Layer is complete again, regardless of whether the hash matches or not (and we reupload). const u8 layer_bit = static_cast(1) << level; @@ -2934,7 +3006,7 @@ void GSTextureCache::Source::PreloadLevel(int level) m_layer_hash[level] = hash; // And upload the texture. - PreloadTexture(m_TEX0, m_TEXA, g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level); + PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level); } bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key) @@ -3674,6 +3746,47 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur return true; } +bool GSTextureCache::SourceRegion::IsFixedTEX0(int tw, int th) const +{ + return (GetMinX() >= static_cast(tw) || GetMinY() >= static_cast(th)); +} + +GSVector4i GSTextureCache::SourceRegion::GetRect(int tw, int th) const +{ + return GSVector4i(HasX() ? GetMinX() : 0, HasY() ? GetMinY() : 0, HasX() ? GetMaxX() : tw, HasY() ? GetMaxY() : th); +} + +GSVector4i GSTextureCache::SourceRegion::GetOffset(int tw, int th) const +{ + const int xoffs = (GetMaxX() > static_cast(tw)) ? static_cast(GetMinX()) : 0; + const int yoffs = (GetMaxY() > static_cast(th)) ? static_cast(GetMinY()) : 0; + return GSVector4i(xoffs, yoffs, xoffs, yoffs); +} + +GSTextureCache::SourceRegion GSTextureCache::SourceRegion::AdjustForMipmap(u32 level) const +{ + SourceRegion ret = {}; + if (HasX()) + { + const u32 new_minx = GetMinX() >> level; + const u32 new_maxx = ((GetMaxX() - 1) >> level) + 1; + ret.SetX(new_minx, new_maxx); + } + if (HasY()) + { + const u32 new_miny = GetMinY() >> level; + const u32 new_maxy = ((GetMaxY() - 1) >> level) + 1; + ret.SetY(new_miny, new_maxy); + } + return ret; +} + +void GSTextureCache::SourceRegion::AdjustTEX0(GIFRegTEX0* TEX0) const +{ + const GSOffset offset(GSLocalMemory::m_psm[TEX0->PSM].info, TEX0->TBP0, TEX0->TBW, TEX0->PSM); + TEX0->TBP0 += offset.bn(GetMinX(), GetMinY()); +} + using BlockHashState = XXH3_state_t; __fi static void BlockHashReset(BlockHashState& st) @@ -3696,16 +3809,16 @@ __fi static GSTextureCache::HashType FinishBlockHash(BlockHashState& st) return GSXXH3_64bits_digest(&st); } -static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, BlockHashState& hash_st, u8* temp) +static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, BlockHashState& hash_st, u8* temp) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSVector2i& bs = psm.bs; - const int tw = 1 << TEX0.TW; - const int th = 1 << TEX0.TH; + const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW); + const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); // From GSLocalMemory foreachBlock(), used for reading textures. // We want to hash the exact same blocks here. - const GSVector4i rect(0, 0, tw, th); + const GSVector4i rect(region.GetRect(tw, th)); const GSVector4i block_rect(rect.ralign(bs)); GSLocalMemory& mem = g_gs_renderer->m_mem; const GSOffset off = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); @@ -3717,7 +3830,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo // the texture data with other textures/framebuffers/etc (which is common). // Even though you might think this would be slower than just hashing for the hash // cache, it actually ends up faster (unswizzling is faster than hashing). - if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu) + if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu || region.GetMaxX() > 0 || region.GetMinY() > 0) { // Expand texture indices. Align to 32 bytes for AVX2. const u32 pitch = Common::AlignUpPow2(static_cast(block_rect.z), 32); @@ -3728,7 +3841,8 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo rtx(mem, off, block_rect, temp, pitch, TEXA); // Hash the expanded texture. - u8* ptr = temp; + u8* ptr = temp + (pitch * static_cast(rect.top - block_rect.top)) + + static_cast(rect.left - block_rect.left); if (pitch == row_size) { BlockHashAccumulate(hash_st, ptr, pitch * static_cast(th)); @@ -3741,8 +3855,6 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo } else { - BlockHashReset(hash_st); - GSOffset::BNHelper bn = off.bnMulti(block_rect.left, block_rect.top); const int right = block_rect.right >> off.blockShiftX(); const int bottom = block_rect.bottom >> off.blockShiftY(); @@ -3758,27 +3870,27 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo } } -GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) +GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region) { BlockHashState hash_st; BlockHashReset(hash_st); - HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer); + HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer); return FinishBlockHash(hash_st); } -void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level) +void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level) { // m_TEX0 is adjusted for mips (messy, should be changed). const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSVector2i& bs = psm.bs; - const int tw = 1 << TEX0.TW; - const int th = 1 << TEX0.TH; + const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW); + const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); // Expand texture/apply palette. - const GSVector4i rect(0, 0, tw, th); + const GSVector4i rect(region.GetRect(tw, th)); const GSVector4i block_rect(rect.ralign(bs)); const GSOffset off(mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM)); - const int read_width = std::max(tw, psm.bs.x); + const int read_width = block_rect.width(); u32 pitch = static_cast(read_width) * sizeof(u32); GSLocalMemory::readTexture rtx = psm.rtx; if (paltex) @@ -3788,8 +3900,9 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE } // If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer. + const GSVector4i unoffset_rect(0, 0, tw, th); GSTexture::GSMap map; - if (rect.eq(block_rect) && tex->Map(map, &rect, level)) + if (rect.eq(block_rect) && tex->Map(map, &unoffset_rect, level)) { rtx(mem, off, block_rect, map.bits, map.pitch, TEXA); tex->Unmap(); @@ -3801,7 +3914,10 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE u8* buff = s_unswizzle_buffer; rtx(mem, off, block_rect, buff, pitch, TEXA); - tex->Update(rect, buff, pitch, level); + + const u8* ptr = buff + (pitch * static_cast(rect.top - block_rect.top)) + + (static_cast(rect.left - block_rect.left) << (paltex ? 0 : 2)); + tex->Update(unoffset_rect, ptr, pitch, level); } } @@ -3813,7 +3929,7 @@ GSTextureCache::HashCacheKey::HashCacheKey() TEXA.U64 = 0; } -GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod) +GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; @@ -3821,12 +3937,13 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE ret.TEX0.U64 = TEX0.U64 & 0x00000007FFF00000ULL; ret.TEXA.U64 = (psm.pal == 0 && psm.fmt > 0) ? (TEXA.U64 & 0x000000FF000080FFULL) : 0; ret.CLUTHash = clut ? GSTextureCache::PaletteKeyHash{}({clut, psm.pal}) : 0; + ret.region = region; BlockHashState hash_st; BlockHashReset(hash_st); // base level is always hashed - HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer); + HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer); if (lod) { @@ -3836,7 +3953,7 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE for (int i = 1; i < nmips; i++) { const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + i)}; - HashTextureLevel(MIP_TEX0, TEXA, hash_st, s_unswizzle_buffer); + HashTextureLevel(MIP_TEX0, TEXA, region.AdjustForMipmap(i), hash_st, s_unswizzle_buffer); } } @@ -3860,6 +3977,6 @@ void GSTextureCache::HashCacheKey::RemoveCLUTHash() u64 GSTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& key) const { std::size_t h = 0; - HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64); + HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64, key.region.bits); return h; } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index 61ea6dab88..0bb73ba770 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -44,6 +44,42 @@ public: return valid && overlap; } + struct SourceRegion + { + u64 bits; + + bool HasX() const { return static_cast(bits) != 0; } + bool HasY() const { return static_cast(bits >> 32) != 0; } + bool HasEither() const { return (bits != 0); } + + void SetX(u32 min, u32 max) { bits |= (min | (max << 16)); } + void SetY(u32 min, u32 max) { bits |= ((static_cast(min) << 32) | (static_cast(max) << 48)); } + + u32 GetMinX() const { return static_cast(bits) & 0xFFFFu; } + u32 GetMaxX() const { return static_cast(bits >> 16) & 0xFFFFu; } + u32 GetMinY() const { return static_cast(bits >> 32) & 0xFFFFu; } + u32 GetMaxY() const { return static_cast(bits >> 48); } + + u32 GetWidth() const { return (GetMaxX() - GetMinX()); } + u32 GetHeight() const { return (GetMaxY() - GetMinY()); } + + /// Returns true if the area of the region exceeds the TW/TH size (i.e. "fixed tex0"). + bool IsFixedTEX0(int tw, int th) const; + + /// Returns the rectangle relative to the texture base pointer that the region occupies. + GSVector4i GetRect(int tw, int th) const; + + /// When TW/TH is less than the extents covered by the region ("fixed tex0"), returns the offset + /// which should be applied to any coordinates to relocate them to the actual region. + GSVector4i GetOffset(int tw, int th) const; + + /// Reduces the range of texels relative to the specified mipmap level. + SourceRegion AdjustForMipmap(u32 level) const; + + /// Adjusts the texture base pointer and block width relative to the region. + void AdjustTEX0(GIFRegTEX0* TEX0) const; + }; + using HashType = u64; struct HashCacheKey @@ -51,10 +87,11 @@ public: HashType TEX0Hash, CLUTHash; GIFRegTEX0 TEX0; GIFRegTEXA TEXA; + SourceRegion region; HashCacheKey(); - static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod); + static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region); HashCacheKey WithRemovedCLUTHash() const; void RemoveCLUTHash(); @@ -148,7 +185,7 @@ public: { GSVector4i* rect; u32 count; - } m_write; + } m_write = {}; void PreloadLevel(int level); @@ -161,6 +198,7 @@ public: GSTexture* m_palette; GSVector4i m_valid_rect; GSVector2i m_lod; + SourceRegion m_region = {}; u8 m_valid_hashes = 0; u8 m_complete_layers = 0; bool m_target; @@ -178,11 +216,13 @@ public: GSOffset::PageLooper m_pages; public: - Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container = false); + Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); virtual ~Source(); __fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); } + void SetPages(); + void Update(const GSVector4i& rect, int layer = 0); void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0); @@ -322,7 +362,7 @@ protected: std::unique_ptr m_uint16_download_texture; std::unique_ptr m_uint32_download_texture; - Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut); + Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region); Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear); /// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset @@ -332,10 +372,10 @@ protected: /// Resizes the download texture if needed. bool PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr* tex); - HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod); + HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region); - static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level); - static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA); + static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level); + static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region); // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; // TODO: virtual void Write(Target* t, const GSVector4i& r) = 0; @@ -358,8 +398,8 @@ public: GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size); - Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod); - Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false); + Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod); + Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette = false); Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData); Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h); diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp index 2cee44524f..30abd4030f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp @@ -42,6 +42,8 @@ // this is a #define instead of a variable to avoid warnings from non-literal format strings #define TEXTURE_FILENAME_FORMAT_STRING "%" PRIx64 "-%08x" #define TEXTURE_FILENAME_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-%08x" +#define TEXTURE_FILENAME_REGION_FORMAT_STRING "%" PRIx64 "-r%" PRIx64 "-" "-%08x" +#define TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-r%" PRIx64 "-%08x" #define TEXTURE_REPLACEMENT_SUBDIRECTORY_NAME "replacements" #define TEXTURE_DUMP_SUBDIRECTORY_NAME "dumps" @@ -51,6 +53,7 @@ namespace { u64 TEX0Hash; u64 CLUTHash; + GSTextureCache::SourceRegion region; union { @@ -68,9 +71,10 @@ namespace }; u32 miplevel; - __fi u32 Width() const { return (1u << TEX0_TW); } - __fi u32 Height() const { return (1u << TEX0_TH); } + __fi u32 Width() const { return (region.HasX() ? region.GetWidth() : (1u << TEX0_TW)); } + __fi u32 Height() const { return (region.HasY() ? region.GetWidth() : (1u << TEX0_TH)); } __fi bool HasPalette() const { return (GSLocalMemory::m_psm[TEX0_PSM].pal > 0); } + __fi bool HasRegion() const { return region.HasEither(); } __fi GSVector2 ReplacementScale(const GSTextureReplacements::ReplacementTexture& rtex) const { @@ -79,14 +83,27 @@ namespace __fi GSVector2 ReplacementScale(u32 rwidth, u32 rheight) const { - return GSVector2(static_cast(rwidth) / static_cast(Width()), static_cast(rheight) / static_cast(Height())); + return GSVector2(static_cast(rwidth) / static_cast(Width()), + static_cast(rheight) / static_cast(Height())); } - __fi bool operator==(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) == std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); } - __fi bool operator!=(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) != std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); } - __fi bool operator<(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) < std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); } + __fi bool operator==(const TextureName& rhs) const + { + return std::tie(TEX0Hash, CLUTHash, region.bits, bits) == + std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits); + } + __fi bool operator!=(const TextureName& rhs) const + { + return std::tie(TEX0Hash, CLUTHash, region.bits, bits) != + std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits); + } + __fi bool operator<(const TextureName& rhs) const + { + return std::tie(TEX0Hash, CLUTHash, region.bits, bits) < + std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits); + } }; - static_assert(sizeof(TextureName) == 24, "ReplacementTextureName is expected size"); + static_assert(sizeof(TextureName) == 32, "ReplacementTextureName is expected size"); } // namespace namespace std @@ -97,7 +114,7 @@ namespace std std::size_t operator()(const TextureName& val) const { std::size_t h = 0; - HashCombine(h, val.TEX0Hash, val.CLUTHash, val.bits, val.miplevel); + HashCombine(h, val.TEX0Hash, val.CLUTHash, val.region.bits, val.bits, val.miplevel); return h; } }; @@ -169,6 +186,7 @@ TextureName GSTextureReplacements::CreateTextureName(const GSTextureCache::HashC name.TEX0Hash = hash.TEX0Hash; name.CLUTHash = name.HasPalette() ? hash.CLUTHash : 0; name.miplevel = miplevel; + name.region = hash.region; return name; } @@ -184,6 +202,7 @@ GSTextureCache::HashCacheKey GSTextureReplacements::HashCacheKeyFromTextureName( key.TEXA.TA1 = tn.TEXA_TA1; key.TEX0Hash = tn.TEX0Hash; key.CLUTHash = tn.HasPalette() ? tn.CLUTHash : 0; + key.region = tn.region; return key; } @@ -192,15 +211,38 @@ std::optional GSTextureReplacements::ParseReplacementName(const std TextureName ret; ret.miplevel = 0; - // TODO(Stenzek): Make this better. char extension_dot; - if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits, &extension_dot) != 4 || extension_dot != '.') + if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, + &ret.region.bits, &ret.bits, &extension_dot) == 5 && + extension_dot == '.') { - if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) != 3 || extension_dot != '.') - return std::nullopt; + return ret; } - return ret; + if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.region.bits, + &ret.bits, &extension_dot) == 4 && + extension_dot == '.') + { + return ret; + } + + ret.region.bits = 0; + + if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits, + &extension_dot) == 4 && + extension_dot == '.') + { + return ret; + } + + if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) == + 3 && + extension_dot == '.') + { + return ret; + } + + return std::nullopt; } std::string GSTextureReplacements::GetGameTextureDirectory() @@ -229,23 +271,45 @@ std::string GSTextureReplacements::GetDumpFilename(const TextureName& name, u32 const std::string game_subdir(Path::Combine(game_dir, TEXTURE_DUMP_SUBDIRECTORY_NAME)); - if (name.HasPalette()) + std::string filename; + if (name.HasRegion()) { - const std::string filename( - (level > 0) ? - StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.CLUTHash, name.bits, level) : - StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png", name.TEX0Hash, name.CLUTHash, name.bits)); - ret = Path::Combine(game_subdir, filename); + if (name.HasPalette()) + { + filename = (level > 0) ? + StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "-mip%u.png", + name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits, level) : + StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING ".png", + name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits); + } + else + { + filename = (level > 0) ? StringUtil::StdStringFromFormat( + TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) : + StringUtil::StdStringFromFormat( + TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits); + } } else { - const std::string filename( - (level > 0) ? - StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) : - StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits)); - ret = Path::Combine(game_subdir, filename); + if (name.HasPalette()) + { + filename = (level > 0) ? StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png", + name.TEX0Hash, name.CLUTHash, name.bits, level) : + StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png", + name.TEX0Hash, name.CLUTHash, name.bits); + } + else + { + filename = (level > 0) ? StringUtil::StdStringFromFormat( + TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) : + StringUtil::StdStringFromFormat( + TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits); + } } + ret = Path::Combine(game_subdir, filename); + return ret; } @@ -569,7 +633,8 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures() s_async_loaded_textures.clear(); } -void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level) +void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, + const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level) { // check if it's been dumped or replaced already const TextureName name(CreateTextureName(hash, level)); @@ -589,12 +654,12 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash // compute width/height const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSVector2i& bs = psm.bs; - const int tw = 1 << TEX0.TW; - const int th = 1 << TEX0.TH; - const GSVector4i rect(0, 0, tw, th); + const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW); + const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + const GSVector4i rect(region.GetRect(tw, th)); const GSVector4i block_rect(rect.ralign(bs)); - const int read_width = std::max(tw, psm.bs.x); - const int read_height = std::max(th, psm.bs.y); + const int read_width = block_rect.width(); + const int read_height = block_rect.height(); const u32 pitch = static_cast(read_width) * sizeof(u32); // use per-texture buffer so we can compress the texture asynchronously and not block the GS thread @@ -603,8 +668,9 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash psm.rtx(mem, mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM), block_rect, buffer.GetPtr(), pitch, TEXA); // okay, now we can actually dump it - QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer)]() { - if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr(), pitch)) + const u32 buffer_offset = ((rect.top - block_rect.top) * pitch) + ((rect.left - block_rect.left) * sizeof(u32)); + QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer), buffer_offset]() { + if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr() + buffer_offset, pitch)) Console.Error("Failed to dump texture to '%s'.", filename.c_str()); }); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h index cc61be7702..39a8e0c8e7 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h @@ -52,7 +52,8 @@ namespace GSTextureReplacements GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, const GSVector2& scale, bool mipmap); void ProcessAsyncLoadedTextures(); - void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level); + void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, + GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level); void ClearDumpedTextureList(); /// Loader will take a filename and interpret the format (e.g. DDS, PNG, etc). diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 85f011b148..a2987d1b72 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1374,6 +1374,8 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC); setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS); setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT); + setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS); + setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT); setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); @@ -1403,7 +1405,6 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD); setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD); setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER); - setFnConstantB(m_fn_constants, pssel.invalid_tex0, GSMTLConstantIndex_PS_INVALID_TEX0); setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK); auto newps = LoadShader(@"ps_main"); ps = newps; @@ -1594,10 +1595,10 @@ static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, WH) == of static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.x) == offsetof(GSMTLMainPSUniform, ta)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.z) == offsetof(GSMTLMainPSUniform, max_depth)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.w) == offsetof(GSMTLMainPSUniform, alpha_fix)); -static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MskFix) == offsetof(GSMTLMainPSUniform, uv_msk_fix)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FbMask) == offsetof(GSMTLMainPSUniform, fbmask)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, HalfTexel) == offsetof(GSMTLMainPSUniform, half_texel)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MinMax) == offsetof(GSMTLMainPSUniform, uv_min_max)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STRange) == offsetof(GSMTLMainPSUniform, st_range)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle) == offsetof(GSMTLMainPSUniform, channel_shuffle)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack) == offsetof(GSMTLMainPSUniform, tc_offset)); static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale) == offsetof(GSMTLMainPSUniform, st_scale)); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 6d22d1b2d1..d8c2265c37 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -108,11 +108,15 @@ struct GSMTLMainPSUniform vector_float2 ta; float max_depth; float alpha_fix; - vector_uint4 uv_msk_fix; vector_uint4 fbmask; vector_float4 half_texel; - vector_float4 uv_min_max; + union + { + vector_float4 uv_min_max; + vector_uint4 uv_msk_fix; + }; + vector_float4 st_range; struct { unsigned int blue_mask; @@ -166,6 +170,8 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_TCC, GSMTLConstantIndex_PS_WMS, GSMTLConstantIndex_PS_WMT, + GSMTLConstantIndex_PS_ADJS, + GSMTLConstantIndex_PS_ADJT, GSMTLConstantIndex_PS_LTF, GSMTLConstantIndex_PS_SHUFFLE, GSMTLConstantIndex_PS_READ_BA, @@ -194,6 +200,5 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_AUTOMATIC_LOD, GSMTLConstantIndex_PS_MANUAL_LOD, GSMTLConstantIndex_PS_POINT_SAMPLER, - GSMTLConstantIndex_PS_INVALID_TEX0, GSMTLConstantIndex_PS_SCANMSK, }; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 1daa381f84..4a46cceaa8 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -37,6 +37,8 @@ constant uint PS_TFX [[function_constant(GSMTLConstantIndex_PS_TF constant bool PS_TCC [[function_constant(GSMTLConstantIndex_PS_TCC)]]; constant uint PS_WMS [[function_constant(GSMTLConstantIndex_PS_WMS)]]; constant uint PS_WMT [[function_constant(GSMTLConstantIndex_PS_WMT)]]; +constant bool PS_ADJS [[function_constant(GSMTLConstantIndex_PS_ADJS)]]; +constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_ADJT)]]; constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]]; constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]]; constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]]; @@ -65,7 +67,6 @@ constant bool PS_TEX_IS_FB [[function_constant(GSMTLConstantIndex_PS_TE constant bool PS_AUTOMATIC_LOD [[function_constant(GSMTLConstantIndex_PS_AUTOMATIC_LOD)]]; constant bool PS_MANUAL_LOD [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]]; constant bool PS_POINT_SAMPLER [[function_constant(GSMTLConstantIndex_PS_POINT_SAMPLER)]]; -constant bool PS_INVALID_TEX0 [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]]; constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]]; constant GSMTLExpandType VS_EXPAND_TYPE = static_cast(VS_EXPAND_TYPE_RAW); @@ -321,7 +322,21 @@ struct PSMain // As of 2018 this issue is still present. uv = (trunc(uv * cb.wh.zw) + 0.5) / cb.wh.zw; } - uv *= cb.st_scale; + if (!PS_ADJS && !PS_ADJT) + { + uv *= cb.st_scale; + } + else + { + if (PS_ADJS) + uv.x = (uv.x - cb.st_range.x) * cb.st_range.z; + else + uv.x = uv.x * cb.st_scale.x; + if (PS_ADJT) + uv.y = (uv.y - cb.st_range.y) * cb.st_range.w; + else + uv.y = uv.y * cb.st_scale.y; + } if (PS_AUTOMATIC_LOD) { @@ -360,7 +375,7 @@ struct PSMain float4 clamp_wrap_uv(float4 uv) { float4 uv_out = uv; - float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy; + float4 tex_size = cb.wh.xyxy; if (PS_WMS == PS_WMT) { @@ -724,12 +739,7 @@ struct PSMain float4 ps_color() { float2 st, st_int; - if (!FST && PS_INVALID_TEX0) - { - st = (in.t.xy * cb.wh.xy) / (in.t.w * cb.wh.zw); - st_int = (in.ti.zw * cb.wh.xy) / (in.t.w * cb.wh.zw); - } - else if (!FST) + if (!FST) { st = in.t.xy / in.t.w; st_int = in.ti.zw / in.t.w; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 7fb4074408..7523c27170 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1029,6 +1029,8 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) std::string macro = fmt::format("#define PS_FST {}\n", sel.fst) + fmt::format("#define PS_WMS {}\n", sel.wms) + fmt::format("#define PS_WMT {}\n", sel.wmt) + + fmt::format("#define PS_ADJS {}\n", sel.adjs) + + fmt::format("#define PS_ADJT {}\n", sel.adjt) + fmt::format("#define PS_AEM_FMT {}\n", sel.aem_fmt) + fmt::format("#define PS_PAL_FMT {}\n", sel.pal_fmt) + fmt::format("#define PS_DFMT {}\n", sel.dfmt) @@ -1037,7 +1039,6 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_URBAN_CHAOS_HLE {}\n", sel.urban_chaos_hle) + fmt::format("#define PS_TALES_OF_ABYSS_HLE {}\n", sel.tales_of_abyss_hle) + fmt::format("#define PS_TEX_IS_FB {}\n", sel.tex_is_fb) - + fmt::format("#define PS_INVALID_TEX0 {}\n", sel.invalid_tex0) + fmt::format("#define PS_AEM {}\n", sel.aem) + fmt::format("#define PS_TFX {}\n", sel.tfx) + fmt::format("#define PS_TCC {}\n", sel.tcc) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 1a1ee0155d..0e4bf4f6a5 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -1948,6 +1948,8 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_FST", sel.fst); AddMacro(ss, "PS_WMS", sel.wms); AddMacro(ss, "PS_WMT", sel.wmt); + AddMacro(ss, "PS_ADJS", sel.adjs); + AddMacro(ss, "PS_ADJT", sel.adjt); AddMacro(ss, "PS_AEM_FMT", sel.aem_fmt); AddMacro(ss, "PS_PAL_FMT", sel.pal_fmt); AddMacro(ss, "PS_DFMT", sel.dfmt); @@ -1955,7 +1957,6 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_CHANNEL_FETCH", sel.channel); AddMacro(ss, "PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle); AddMacro(ss, "PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); - AddMacro(ss, "PS_INVALID_TEX0", sel.invalid_tex0); AddMacro(ss, "PS_AEM", sel.aem); AddMacro(ss, "PS_TFX", sel.tfx); AddMacro(ss, "PS_TCC", sel.tcc); diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 602ed0d8bf..af2c5fe8dd 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -15,4 +15,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 11; +static constexpr u32 SHADER_CACHE_VERSION = 12;