mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Optimize TC source size based on CLAMP
This commit is contained in:
parent
b9b47e3ec7
commit
7d08a54ad9
|
@ -21,6 +21,8 @@
|
|||
#define PS_FST 0
|
||||
#define PS_WMS 0
|
||||
#define PS_WMT 0
|
||||
#define PS_ADJS 0
|
||||
#define PS_ADJT 0
|
||||
#define PS_AEM_FMT FMT_32
|
||||
#define PS_AEM 0
|
||||
#define PS_TFX 0
|
||||
|
@ -42,7 +44,6 @@
|
|||
#define PS_CHANNEL_FETCH 0
|
||||
#define PS_TALES_OF_ABYSS_HLE 0
|
||||
#define PS_URBAN_CHAOS_HLE 0
|
||||
#define PS_INVALID_TEX0 0
|
||||
#define PS_SCALE_FACTOR 1.0
|
||||
#define PS_HDR 0
|
||||
#define PS_COLCLIP 0
|
||||
|
@ -158,10 +159,10 @@ cbuffer cb1
|
|||
float2 TA;
|
||||
float MaxDepthPS;
|
||||
float Af;
|
||||
uint4 MskFix;
|
||||
uint4 FbMask;
|
||||
float4 HalfTexel;
|
||||
float4 MinMax;
|
||||
float4 STRange;
|
||||
int4 ChannelShuffle;
|
||||
float2 TC_OffsetHack;
|
||||
float2 STScale;
|
||||
|
@ -183,7 +184,20 @@ float4 sample_c(float2 uv, float uv_w)
|
|||
// As of 2018 this issue is still present.
|
||||
uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw;
|
||||
}
|
||||
#if !PS_ADJS && !PS_ADJT
|
||||
uv *= STScale;
|
||||
#else
|
||||
#if PS_ADJS
|
||||
uv.x = (uv.x - STRange.x) * STRange.z;
|
||||
#else
|
||||
uv.x = uv.x * STScale.x;
|
||||
#endif
|
||||
#if PS_ADJT
|
||||
uv.y = (uv.y - STRange.y) * STRange.w;
|
||||
#else
|
||||
uv.y = uv.y * STScale.y;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PS_AUTOMATIC_LOD == 1
|
||||
return Texture.Sample(TextureSampler, uv);
|
||||
|
@ -218,12 +232,7 @@ float4 sample_p_norm(float u)
|
|||
|
||||
float4 clamp_wrap_uv(float4 uv)
|
||||
{
|
||||
float4 tex_size;
|
||||
|
||||
if (PS_INVALID_TEX0 == 1)
|
||||
tex_size = WH.zwzw;
|
||||
else
|
||||
tex_size = WH.xyxy;
|
||||
float4 tex_size = WH.xyxy;
|
||||
|
||||
if(PS_WMS == PS_WMT)
|
||||
{
|
||||
|
@ -238,7 +247,7 @@ float4 clamp_wrap_uv(float4 uv)
|
|||
// textures. Fixes Xenosaga's hair issue.
|
||||
uv = frac(uv);
|
||||
#endif
|
||||
uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
|
||||
uv = (float4)(((uint4)(uv * tex_size) & asuint(MinMax.xyxy)) | asuint(MinMax.zwzw)) / tex_size;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -252,7 +261,7 @@ float4 clamp_wrap_uv(float4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.xz = frac(uv.xz);
|
||||
#endif
|
||||
uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
|
||||
uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & asuint(MinMax.xx)) | asuint(MinMax.zz)) / tex_size.xx;
|
||||
}
|
||||
if(PS_WMT == 2)
|
||||
{
|
||||
|
@ -263,7 +272,7 @@ float4 clamp_wrap_uv(float4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.yw = frac(uv.yw);
|
||||
#endif
|
||||
uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
|
||||
uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & asuint(MinMax.yy)) | asuint(MinMax.ww)) / tex_size.yy;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -353,7 +362,7 @@ float4 fetch_c(int2 uv)
|
|||
|
||||
int2 clamp_wrap_uv_depth(int2 uv)
|
||||
{
|
||||
int4 mask = (int4)MskFix << 4;
|
||||
int4 mask = asint(MinMax) << 4;
|
||||
if (PS_WMS == PS_WMT)
|
||||
{
|
||||
if (PS_WMS == 2)
|
||||
|
@ -676,11 +685,7 @@ float4 fog(float4 c, float f)
|
|||
|
||||
float4 ps_color(PS_INPUT input)
|
||||
{
|
||||
#if PS_FST == 0 && PS_INVALID_TEX0 == 1
|
||||
// Re-normalize coordinate from invalid GS to corrected texture size
|
||||
float2 st = (input.t.xy * WH.xy) / (input.t.w * WH.zw);
|
||||
float2 st_int = (input.ti.zw * WH.xy) / (input.t.w * WH.zw);
|
||||
#elif PS_FST == 0
|
||||
#if PS_FST == 0
|
||||
float2 st = input.t.xy / input.t.w;
|
||||
float2 st_int = input.ti.zw / input.t.w;
|
||||
#else
|
||||
|
|
|
@ -75,13 +75,12 @@ layout(std140, binding = 0) uniform cb21
|
|||
float MaxDepthPS;
|
||||
float Af;
|
||||
|
||||
uvec4 MskFix;
|
||||
|
||||
uvec4 FbMask;
|
||||
|
||||
vec4 HalfTexel;
|
||||
|
||||
vec4 MinMax;
|
||||
vec4 STRange;
|
||||
|
||||
ivec4 ChannelShuffle;
|
||||
|
||||
|
@ -92,11 +91,6 @@ layout(std140, binding = 0) uniform cb21
|
|||
};
|
||||
#endif
|
||||
|
||||
//layout(std140, binding = 22) uniform cb22
|
||||
//{
|
||||
// vec4 rt_size;
|
||||
//};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Default Sampler
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -109,7 +109,20 @@ vec4 sample_c(vec2 uv)
|
|||
// As of 2018 this issue is still present.
|
||||
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
|
||||
#endif
|
||||
uv *= STScale;
|
||||
#if !PS_ADJS && !PS_ADJT
|
||||
uv *= STScale;
|
||||
#else
|
||||
#if PS_ADJS
|
||||
uv.x = (uv.x - STRange.x) * STRange.z;
|
||||
#else
|
||||
uv.x = uv.x * STScale.x;
|
||||
#endif
|
||||
#if PS_ADJT
|
||||
uv.y = (uv.y - STRange.y) * STRange.w;
|
||||
#else
|
||||
uv.y = uv.y * STScale.y;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PS_AUTOMATIC_LOD == 1
|
||||
return texture(TextureSampler, uv);
|
||||
|
@ -146,11 +159,7 @@ vec4 sample_p_norm(float u)
|
|||
vec4 clamp_wrap_uv(vec4 uv)
|
||||
{
|
||||
vec4 uv_out = uv;
|
||||
#if PS_INVALID_TEX0 == 1
|
||||
vec4 tex_size = WH.zwzw;
|
||||
#else
|
||||
vec4 tex_size = WH.xyxy;
|
||||
#endif
|
||||
|
||||
#if PS_WMS == PS_WMT
|
||||
|
||||
|
@ -162,7 +171,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
// textures. Fixes Xenosaga's hair issue.
|
||||
uv = fract(uv);
|
||||
#endif
|
||||
uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
|
||||
uv_out = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
|
||||
#endif
|
||||
|
||||
#else // PS_WMS != PS_WMT
|
||||
|
@ -174,7 +183,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.xz = fract(uv.xz);
|
||||
#endif
|
||||
uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
|
||||
uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -185,7 +194,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.yw = fract(uv.yw);
|
||||
#endif
|
||||
uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
|
||||
uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -288,7 +297,7 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv)
|
|||
|
||||
// Keep the full precision
|
||||
// It allow to multiply the ScalingFactor before the 1/16 coeff
|
||||
ivec4 mask = ivec4(MskFix) << 4;
|
||||
ivec4 mask = floatBitsToInt(MinMax) << 4;
|
||||
|
||||
#if PS_WMS == PS_WMT
|
||||
|
||||
|
@ -591,11 +600,7 @@ void fog(inout vec4 C, float f)
|
|||
vec4 ps_color()
|
||||
{
|
||||
//FIXME: maybe we can set gl_Position.w = q in VS
|
||||
#if (PS_FST == 0) && (PS_INVALID_TEX0 == 1)
|
||||
// Re-normalize coordinate from invalid GS to corrected texture size
|
||||
vec2 st = (PSin.t_float.xy * WH.xy) / (vec2(PSin.t_float.w) * WH.zw);
|
||||
vec2 st_int = (PSin.t_int.zw * WH.xy) / (vec2(PSin.t_float.w) * WH.zw);
|
||||
#elif (PS_FST == 0)
|
||||
#if (PS_FST == 0)
|
||||
vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
|
||||
vec2 st_int = PSin.t_int.zw / vec2(PSin.t_float.w);
|
||||
#else
|
||||
|
|
|
@ -312,6 +312,8 @@ void main()
|
|||
#define PS_FST 0
|
||||
#define PS_WMS 0
|
||||
#define PS_WMT 0
|
||||
#define PS_ADJS 0
|
||||
#define PS_ADJT 0
|
||||
#define PS_FMT FMT_32
|
||||
#define PS_AEM 0
|
||||
#define PS_TFX 0
|
||||
|
@ -332,7 +334,6 @@ void main()
|
|||
#define PS_CHANNEL_FETCH 0
|
||||
#define PS_TALES_OF_ABYSS_HLE 0
|
||||
#define PS_URBAN_CHAOS_HLE 0
|
||||
#define PS_INVALID_TEX0 0
|
||||
#define PS_SCALE_FACTOR 1.0
|
||||
#define PS_HDR 0
|
||||
#define PS_COLCLIP 0
|
||||
|
@ -361,10 +362,10 @@ layout(std140, set = 0, binding = 1) uniform cb1
|
|||
vec2 TA;
|
||||
float MaxDepthPS;
|
||||
float Af;
|
||||
uvec4 MskFix;
|
||||
uvec4 FbMask;
|
||||
vec4 HalfTexel;
|
||||
vec4 MinMax;
|
||||
vec4 STRange;
|
||||
ivec4 ChannelShuffle;
|
||||
vec2 TC_OffsetHack;
|
||||
vec2 STScale;
|
||||
|
@ -420,7 +421,20 @@ vec4 sample_c(vec2 uv)
|
|||
// As of 2018 this issue is still present.
|
||||
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
|
||||
#endif
|
||||
#if !PS_ADJS && !PS_ADJT
|
||||
uv *= STScale;
|
||||
#else
|
||||
#if PS_ADJS
|
||||
uv.x = (uv.x - STRange.x) * STRange.z;
|
||||
#else
|
||||
uv.x = uv.x * STScale.x;
|
||||
#endif
|
||||
#if PS_ADJT
|
||||
uv.y = (uv.y - STRange.y) * STRange.w;
|
||||
#else
|
||||
uv.y = uv.y * STScale.y;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PS_AUTOMATIC_LOD == 1
|
||||
return texture(Texture, uv);
|
||||
|
@ -455,13 +469,7 @@ vec4 sample_p_norm(float u)
|
|||
|
||||
vec4 clamp_wrap_uv(vec4 uv)
|
||||
{
|
||||
vec4 tex_size;
|
||||
|
||||
#if PS_INVALID_TEX0
|
||||
tex_size = WH.zwzw;
|
||||
#else
|
||||
tex_size = WH.xyxy;
|
||||
#endif
|
||||
vec4 tex_size = WH.xyxy;
|
||||
|
||||
#if PS_WMS == PS_WMT
|
||||
{
|
||||
|
@ -476,7 +484,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
// textures. Fixes Xenosaga's hair issue.
|
||||
uv = fract(uv);
|
||||
#endif
|
||||
uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
|
||||
uv = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -491,7 +499,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.xz = fract(uv.xz);
|
||||
#endif
|
||||
uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
|
||||
uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
|
||||
}
|
||||
#endif
|
||||
#if PS_WMT == 2
|
||||
|
@ -503,7 +511,7 @@ vec4 clamp_wrap_uv(vec4 uv)
|
|||
#if PS_FST == 0
|
||||
uv.yw = fract(uv.yw);
|
||||
#endif
|
||||
uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
|
||||
uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -590,7 +598,7 @@ vec4 fetch_c(ivec2 uv)
|
|||
|
||||
ivec2 clamp_wrap_uv_depth(ivec2 uv)
|
||||
{
|
||||
ivec4 mask = ivec4(MskFix << 4);
|
||||
ivec4 mask = floatBitsToInt(MinMax) << 4;
|
||||
#if (PS_WMS == PS_WMT)
|
||||
{
|
||||
#if (PS_WMS == 2)
|
||||
|
@ -907,11 +915,7 @@ vec4 fog(vec4 c, float f)
|
|||
|
||||
vec4 ps_color()
|
||||
{
|
||||
#if PS_FST == 0 && PS_INVALID_TEX0 == 1
|
||||
// Re-normalize coordinate from invalid GS to corrected texture size
|
||||
vec2 st = (vsIn.t.xy * WH.xy) / (vsIn.t.w * WH.zw);
|
||||
vec2 st_int = (vsIn.ti.zw * WH.xy) / (vsIn.t.w * WH.zw);
|
||||
#elif PS_FST == 0
|
||||
#if PS_FST == 0
|
||||
vec2 st = vsIn.t.xy / vsIn.t.w;
|
||||
vec2 st_int = vsIn.ti.zw / vsIn.t.w;
|
||||
#else
|
||||
|
|
|
@ -130,7 +130,7 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
|
|||
res.TW = tw > 10 ? 0 : tw;
|
||||
res.TH = th > 10 ? 0 : th;
|
||||
|
||||
if (GSConfig.Renderer == GSRendererType::SW && (TEX0.TW != res.TW || TEX0.TH != res.TH))
|
||||
if (TEX0.TW != res.TW || TEX0.TH != res.TH)
|
||||
{
|
||||
GL_DBG("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)",
|
||||
(int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM,
|
||||
|
@ -142,50 +142,3 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
|
|||
|
||||
return res;
|
||||
}
|
||||
|
||||
void GSDrawingContext::ComputeFixedTEX0(const GSVector4& st)
|
||||
{
|
||||
// It is quite complex to handle rescaling so this function is less stricter than GetSizeFixedTEX0,
|
||||
// therefore we remove the reduce optimization and we don't handle bilinear filtering which might create wrong interpolation at the border.
|
||||
int tw = TEX0.TW;
|
||||
int th = TEX0.TH;
|
||||
|
||||
int wms = (int)CLAMP.WMS;
|
||||
int wmt = (int)CLAMP.WMT;
|
||||
|
||||
int minu = (int)CLAMP.MINU;
|
||||
int minv = (int)CLAMP.MINV;
|
||||
int maxu = (int)CLAMP.MAXU;
|
||||
int maxv = (int)CLAMP.MAXV;
|
||||
|
||||
if (wms != CLAMP_REGION_CLAMP)
|
||||
tw = tw > 10 ? 0 : tw;
|
||||
|
||||
if (wmt != CLAMP_REGION_CLAMP)
|
||||
th = th > 10 ? 0 : th;
|
||||
|
||||
GSVector4i uv = GSVector4i(st.floor().xyzw(st.ceil()));
|
||||
|
||||
uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu);
|
||||
uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv);
|
||||
|
||||
if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
|
||||
tw = extend(uv.x, tw);
|
||||
|
||||
if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
|
||||
th = extend(uv.y, th);
|
||||
|
||||
tw = std::clamp<int>(tw, 0, 10);
|
||||
th = std::clamp<int>(th, 0, 10);
|
||||
|
||||
if ((tw != (int)TEX0.TW) || (th != (int)TEX0.TH))
|
||||
{
|
||||
m_fixed_tex0 = true;
|
||||
TEX0.TW = tw;
|
||||
TEX0.TH = th;
|
||||
|
||||
GL_DBG("FixedTEX0 TW %d=>%d, TH %d=>%d wm %d,%d",
|
||||
(int)stack.TEX0.TW, (int)TEX0.TW, (int)stack.TEX0.TH, (int)TEX0.TH,
|
||||
(int)CLAMP.WMS, (int)CLAMP.WMT);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,12 +69,8 @@ public:
|
|||
GIFRegZBUF ZBUF;
|
||||
} stack;
|
||||
|
||||
bool m_fixed_tex0;
|
||||
|
||||
GSDrawingContext()
|
||||
{
|
||||
m_fixed_tex0 = false;
|
||||
|
||||
memset(&offset, 0, sizeof(offset));
|
||||
|
||||
Reset();
|
||||
|
@ -140,8 +136,6 @@ public:
|
|||
}
|
||||
|
||||
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const;
|
||||
void ComputeFixedTEX0(const GSVector4& st);
|
||||
bool HasFixedTEX0() const { return m_fixed_tex0; }
|
||||
|
||||
// Save & Restore before/after draw allow to correct/optimize current register for current draw
|
||||
// Note: we could avoid the restore part if all renderer code is updated to use a local copy instead
|
||||
|
@ -159,9 +153,6 @@ public:
|
|||
stack.FBA = FBA;
|
||||
stack.FRAME = FRAME;
|
||||
stack.ZBUF = ZBUF;
|
||||
|
||||
// This function is called before the draw so take opportunity to reset m_fixed_tex0
|
||||
m_fixed_tex0 = false;
|
||||
}
|
||||
|
||||
void RestoreReg()
|
||||
|
|
|
@ -823,6 +823,7 @@ union
|
|||
REG_END2
|
||||
__forceinline bool IsRepeating() const
|
||||
{
|
||||
// This is actually "does the texture span more than one page".
|
||||
if (TBW < 2)
|
||||
{
|
||||
if (PSM == PSM_PSMT8)
|
||||
|
|
|
@ -1699,7 +1699,6 @@ inline void GSState::CopyEnv(GSDrawingEnvironment* dest, GSDrawingEnvironment* s
|
|||
{
|
||||
memcpy(dest, src, 88);
|
||||
memcpy(&dest->CTXT[ctx], &src->CTXT[ctx], 96);
|
||||
dest->CTXT[ctx].m_fixed_tex0 = src->CTXT[ctx].m_fixed_tex0;
|
||||
}
|
||||
|
||||
void GSState::Flush(GSFlushReason reason)
|
||||
|
@ -3583,8 +3582,11 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c
|
|||
|
||||
const int minu = (int)CLAMP.MINU;
|
||||
const int minv = (int)CLAMP.MINV;
|
||||
const int maxu = (int)CLAMP.MAXU;
|
||||
const int maxv = (int)CLAMP.MAXV;
|
||||
|
||||
// For the FixedTEX0 case, in hardware, we handle this in the texture cache. Don't OR the bits in here, otherwise
|
||||
// we'll end up with an invalid rectangle, we want the passed-in rectangle to be relative to the normalized size.
|
||||
const int maxu = (wms != CLAMP_REGION_REPEAT || (int)CLAMP.MAXU < w) ? (int)CLAMP.MAXU : 0;
|
||||
const int maxv = (wmt != CLAMP_REGION_REPEAT || (int)CLAMP.MAXV < h) ? (int)CLAMP.MAXV : 0;
|
||||
|
||||
GSVector4i vr = tr;
|
||||
|
||||
|
|
|
@ -309,6 +309,8 @@ struct alignas(16) GSHWDrawConfig
|
|||
u32 tcc : 1;
|
||||
u32 wms : 2;
|
||||
u32 wmt : 2;
|
||||
u32 adjs : 1;
|
||||
u32 adjt : 1;
|
||||
u32 ltf : 1;
|
||||
// Shuffle and fbmask effect
|
||||
u32 shuffle : 1;
|
||||
|
@ -352,7 +354,6 @@ struct alignas(16) GSHWDrawConfig
|
|||
u32 automatic_lod : 1;
|
||||
u32 manual_lod : 1;
|
||||
u32 point_sampler : 1;
|
||||
u32 invalid_tex0 : 1; // Lupin the 3rd
|
||||
|
||||
// Scan mask
|
||||
u32 scanmsk : 2;
|
||||
|
@ -554,11 +555,11 @@ struct alignas(16) GSHWDrawConfig
|
|||
GSVector4 FogColor_AREF;
|
||||
GSVector4 WH;
|
||||
GSVector4 TA_MaxDepth_Af;
|
||||
GSVector4i MskFix;
|
||||
GSVector4i FbMask;
|
||||
|
||||
GSVector4 HalfTexel;
|
||||
GSVector4 MinMax;
|
||||
GSVector4 STRange;
|
||||
GSVector4i ChannelShuffle;
|
||||
GSVector2 TCOffsetHack;
|
||||
GSVector2 STScale;
|
||||
|
|
|
@ -142,6 +142,8 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
sm.AddMacro("PS_FST", sel.fst);
|
||||
sm.AddMacro("PS_WMS", sel.wms);
|
||||
sm.AddMacro("PS_WMT", sel.wmt);
|
||||
sm.AddMacro("PS_ADJS", sel.adjs);
|
||||
sm.AddMacro("PS_ADJT", sel.adjt);
|
||||
sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
|
||||
sm.AddMacro("PS_AEM", sel.aem);
|
||||
sm.AddMacro("PS_TFX", sel.tfx);
|
||||
|
@ -164,7 +166,6 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
sm.AddMacro("PS_DFMT", sel.dfmt);
|
||||
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
|
||||
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
|
||||
sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0);
|
||||
sm.AddMacro("PS_HDR", sel.hdr);
|
||||
sm.AddMacro("PS_COLCLIP", sel.colclip);
|
||||
sm.AddMacro("PS_BLEND_A", sel.blend_a);
|
||||
|
|
|
@ -1483,6 +1483,8 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
|||
sm.AddMacro("PS_FST", sel.fst);
|
||||
sm.AddMacro("PS_WMS", sel.wms);
|
||||
sm.AddMacro("PS_WMT", sel.wmt);
|
||||
sm.AddMacro("PS_ADJS", sel.adjs);
|
||||
sm.AddMacro("PS_ADJT", sel.adjt);
|
||||
sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
|
||||
sm.AddMacro("PS_AEM", sel.aem);
|
||||
sm.AddMacro("PS_TFX", sel.tfx);
|
||||
|
@ -1505,7 +1507,6 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
|||
sm.AddMacro("PS_DFMT", sel.dfmt);
|
||||
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
|
||||
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
|
||||
sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0);
|
||||
sm.AddMacro("PS_HDR", sel.hdr);
|
||||
sm.AddMacro("PS_COLCLIP", sel.colclip);
|
||||
sm.AddMacro("PS_BLEND_A", sel.blend_a);
|
||||
|
|
|
@ -1279,10 +1279,6 @@ void GSRendererHW::Draw()
|
|||
return;
|
||||
}
|
||||
|
||||
// Fix TEX0 size
|
||||
if (PRIM->TME && !IsMipMapActive())
|
||||
m_context->ComputeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t));
|
||||
|
||||
// skip alpha test if possible
|
||||
// Note: do it first so we know if frame/depth writes are masked
|
||||
|
||||
|
@ -1528,8 +1524,8 @@ void GSRendererHW::Draw()
|
|||
|
||||
TextureMinMaxResult tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear());
|
||||
|
||||
m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, tmm.coverage) :
|
||||
m_tc->LookupSource(TEX0, env.TEXA, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic ||
|
||||
m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage) :
|
||||
m_tc->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic ||
|
||||
GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr);
|
||||
|
||||
// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
|
||||
|
@ -1642,7 +1638,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++)
|
||||
{
|
||||
const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer);
|
||||
const GIFRegTEX0 MIP_TEX0(GetTex0Layer(layer));
|
||||
|
||||
m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM);
|
||||
|
||||
|
@ -3105,6 +3101,26 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
|
|||
}
|
||||
}
|
||||
|
||||
__ri static constexpr bool IsRedundantClamp(u8 clamp, u32 clamp_min, u32 clamp_max, u32 tsize)
|
||||
{
|
||||
// Don't shader sample when the clamp/repeat is configured to the texture size.
|
||||
// That way trilinear etc still works.
|
||||
const u32 textent = (1u << tsize) - 1u;
|
||||
if (clamp == CLAMP_REGION_CLAMP)
|
||||
return (clamp_min == 0 && clamp_max == textent);
|
||||
else if (clamp == CLAMP_REGION_REPEAT)
|
||||
return (clamp_max == 0 && clamp_min == textent);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
__ri static constexpr u8 EffectiveClamp(u8 clamp, bool has_region)
|
||||
{
|
||||
// When we have extracted the region in the texture, we can use the hardware sampler for repeat/clamp.
|
||||
// (weird flip here because clamp/repeat is inverted for region vs non-region).
|
||||
return (clamp >= CLAMP_REGION_CLAMP && has_region) ? (clamp ^ 3) : clamp;
|
||||
}
|
||||
|
||||
void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
||||
{
|
||||
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
|
||||
|
@ -3112,9 +3128,16 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
|||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
|
||||
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
|
||||
|
||||
const u8 wms = m_context->CLAMP.WMS;
|
||||
const u8 wmt = m_context->CLAMP.WMT;
|
||||
static constexpr const char* clamp_modes[] = { "REPEAT", "CLAMP", "REGION_CLAMP", "REGION_REPEAT" };
|
||||
const bool redundant_wms = IsRedundantClamp(m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, tex->m_TEX0.TW);
|
||||
const bool redundant_wmt = IsRedundantClamp(m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, tex->m_TEX0.TH);
|
||||
const u8 wms = EffectiveClamp(m_context->CLAMP.WMS, tex->m_region.HasX());
|
||||
const u8 wmt = EffectiveClamp(m_context->CLAMP.WMT, tex->m_region.HasY());
|
||||
const bool complex_wms_wmt = !!((wms | wmt) & 2);
|
||||
GL_CACHE("WMS: %s [%s%s] WMT: %s [%s%s] Complex: %d MINU: %d MINV: %d MINV: %d MAXV: %d",
|
||||
clamp_modes[m_context->CLAMP.WMS], redundant_wms ? "redundant," : "", clamp_modes[wms],
|
||||
clamp_modes[m_context->CLAMP.WMT], redundant_wmt ? "redundant," : "", clamp_modes[wmt],
|
||||
complex_wms_wmt, m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
|
||||
|
||||
const bool need_mipmap = IsMipMapDraw();
|
||||
const bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt || psm.depth;
|
||||
|
@ -3290,14 +3313,38 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
|||
const GSVector4 st_scale = WH.zwzw() / GSVector4(w, h).xyxy();
|
||||
m_conf.cb_ps.STScale = GSVector2(st_scale.x, st_scale.y);
|
||||
|
||||
if (tex->m_region.HasX())
|
||||
{
|
||||
m_conf.cb_ps.STRange.x = static_cast<float>(tex->m_region.GetMinX()) / static_cast<float>(miptw);
|
||||
m_conf.cb_ps.STRange.z = static_cast<float>(miptw) / static_cast<float>(tex->m_region.GetWidth());
|
||||
m_conf.ps.adjs = 1;
|
||||
}
|
||||
if (tex->m_region.HasY())
|
||||
{
|
||||
m_conf.cb_ps.STRange.y = static_cast<float>(tex->m_region.GetMinY()) / static_cast<float>(mipth);
|
||||
m_conf.cb_ps.STRange.w = static_cast<float>(mipth) / static_cast<float>(tex->m_region.GetHeight());
|
||||
m_conf.ps.adjt = 1;
|
||||
}
|
||||
|
||||
m_conf.ps.fst = !!PRIM->FST;
|
||||
|
||||
m_conf.cb_ps.WH = WH;
|
||||
m_conf.cb_ps.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
|
||||
if (complex_wms_wmt)
|
||||
{
|
||||
m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);;
|
||||
m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy();
|
||||
const GSVector4i clamp(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
|
||||
const GSVector4 region_repeat(GSVector4::cast(clamp));
|
||||
const GSVector4 region_clamp(GSVector4(clamp) / WH.xyxy());
|
||||
if (wms >= CLAMP_REGION_CLAMP)
|
||||
{
|
||||
m_conf.cb_ps.MinMax.x = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.x : region_repeat.x;
|
||||
m_conf.cb_ps.MinMax.z = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.z : region_repeat.z;
|
||||
}
|
||||
if (wmt >= CLAMP_REGION_CLAMP)
|
||||
{
|
||||
m_conf.cb_ps.MinMax.y = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.y : region_repeat.y;
|
||||
m_conf.cb_ps.MinMax.w = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.w : region_repeat.w;
|
||||
}
|
||||
}
|
||||
else if (trilinear_manual)
|
||||
{
|
||||
|
@ -3318,18 +3365,6 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
|||
m_conf.cb_ps.TCOffsetHack = GSVector2(tc_oh_ts.z, tc_oh_ts.w);
|
||||
m_conf.cb_vs.texture_scale = GSVector2(tc_oh_ts.x, tc_oh_ts.y);
|
||||
|
||||
// Must be done after all coordinates math
|
||||
if (m_context->HasFixedTEX0() && !PRIM->FST)
|
||||
{
|
||||
m_conf.ps.invalid_tex0 = 1;
|
||||
// Use invalid size to denormalize ST coordinate
|
||||
m_conf.cb_ps.WH.x = static_cast<float>(1 << m_context->stack.TEX0.TW);
|
||||
m_conf.cb_ps.WH.y = static_cast<float>(1 << m_context->stack.TEX0.TH);
|
||||
|
||||
// We can't handle m_target with invalid_tex0 atm due to upscaling
|
||||
ASSERT(!tex->m_target);
|
||||
}
|
||||
|
||||
// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
|
||||
m_conf.sampler.tau = (wms != CLAMP_CLAMP);
|
||||
m_conf.sampler.tav = (wmt != CLAMP_CLAMP);
|
||||
|
|
|
@ -119,7 +119,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm
|
|||
target->m_dirty.push_back(GSDirtyRect(rect, psm, bw));
|
||||
}
|
||||
|
||||
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette)
|
||||
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette)
|
||||
{
|
||||
if (GSConfig.UserHacks_DisableDepthSupport)
|
||||
{
|
||||
|
@ -177,7 +177,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
TEX0.TBP0, psm_str(psm));
|
||||
|
||||
// Create a shared texture source
|
||||
src = new Source(TEX0, TEXA, true);
|
||||
src = new Source(TEX0, TEXA);
|
||||
src->m_texture = dst->m_texture;
|
||||
src->m_shared_texture = true;
|
||||
src->m_target = true; // So renderer can check if a conversion is required
|
||||
|
@ -201,7 +201,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
else if (g_gs_renderer->m_game.title == CRC::SVCChaos || g_gs_renderer->m_game.title == CRC::KOF2002)
|
||||
{
|
||||
// SVCChaos black screen & KOF2002 blue screen on main menu, regardless of depth enabled or disabled.
|
||||
return LookupSource(TEX0, TEXA, r, nullptr);
|
||||
return LookupSource(TEX0, TEXA, CLAMP, r, nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -227,24 +227,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
|||
return src;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod)
|
||||
__ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
|
||||
const GSLocalMemory::psm_t& psm_s, const u32* clut, const GSTexture* gpu_clut, const GSVector2i& compare_lod,
|
||||
const GSTextureCache::SourceRegion& region, FastList<GSTextureCache::Source*>& map)
|
||||
{
|
||||
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP);
|
||||
|
||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
||||
|
||||
const u32* const clut = g_gs_renderer->m_mem.m_clut;
|
||||
GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
|
||||
|
||||
Source* src = NULL;
|
||||
|
||||
auto& m = m_src.m_map[TEX0.TBP0 >> 5];
|
||||
|
||||
const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0));
|
||||
for (auto i = m.begin(); i != m.end(); ++i)
|
||||
for (auto i = map.begin(); i != map.end(); ++i)
|
||||
{
|
||||
Source* s = *i;
|
||||
GSTextureCache::Source* s = *i;
|
||||
|
||||
if (((TEX0.U32[0] ^ s->m_TEX0.U32[0]) | ((TEX0.U32[1] ^ s->m_TEX0.U32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||
continue;
|
||||
|
@ -272,20 +261,92 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
continue;
|
||||
}
|
||||
|
||||
if (s->m_region.bits != 0 && s->m_region.bits != region.bits)
|
||||
continue;
|
||||
|
||||
// Same base mip texture, but we need to check that MXL was the same as well.
|
||||
// When mipmapping is off, this will be 0,0 vs 0,0.
|
||||
if (s->m_lod != compare_lod)
|
||||
continue;
|
||||
}
|
||||
|
||||
m.MoveFront(i.Index());
|
||||
|
||||
src = s;
|
||||
|
||||
break;
|
||||
map.MoveFront(i.Index());
|
||||
return s;
|
||||
}
|
||||
|
||||
Target* dst = NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod)
|
||||
{
|
||||
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH);
|
||||
|
||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
||||
|
||||
const u32* const clut = g_gs_renderer->m_mem.m_clut;
|
||||
GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
|
||||
|
||||
SourceRegion region = {};
|
||||
if (CLAMP.WMS == CLAMP_REGION_CLAMP && CLAMP.MAXU >= CLAMP.MINU)
|
||||
{
|
||||
// Another Lupin case here, it uses region clamp with UV (not ST), puts a clamp region further
|
||||
// into the texture, but a smaller TW/TH. Catch this by looking for a clamp range above TW.
|
||||
const u32 rw = CLAMP.MAXU - CLAMP.MAXU + 1;
|
||||
if (rw < (1u << TEX0.TW) || CLAMP.MAXU >= (1u << TEX0.TW))
|
||||
{
|
||||
region.SetX(CLAMP.MINU, CLAMP.MAXU + 1);
|
||||
GL_CACHE("TC: Region clamp optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth());
|
||||
}
|
||||
}
|
||||
else if (CLAMP.WMS == CLAMP_REGION_REPEAT && CLAMP.MINU != 0)
|
||||
{
|
||||
// Lupin the 3rd is really evil, it sets TW/TH to the texture size, but then uses region repeat
|
||||
// to offset the actual texture data to elsewhere. So, we'll just force any cases like this down
|
||||
// the region texture path.
|
||||
const u32 rw = ((CLAMP.MINU | CLAMP.MAXU) - CLAMP.MAXU) + 1;
|
||||
if (rw < (1u << TEX0.TW) || CLAMP.MAXU != 0)
|
||||
{
|
||||
region.SetX(CLAMP.MAXU, (CLAMP.MINU | CLAMP.MAXU) + 1);
|
||||
GL_CACHE("TC: Region repeat optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth());
|
||||
}
|
||||
}
|
||||
if (CLAMP.WMT == CLAMP_REGION_CLAMP && CLAMP.MAXV >= CLAMP.MINV)
|
||||
{
|
||||
const u32 rh = CLAMP.MAXV - CLAMP.MINV + 1;
|
||||
if (rh < (1u << TEX0.TH) || CLAMP.MAXV >= (1u << TEX0.TH))
|
||||
{
|
||||
region.SetY(CLAMP.MINV, CLAMP.MAXV + 1);
|
||||
GL_CACHE("TC: Region clamp optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight());
|
||||
}
|
||||
}
|
||||
else if (CLAMP.WMT == CLAMP_REGION_REPEAT && CLAMP.MINV != 0)
|
||||
{
|
||||
const u32 rh = ((CLAMP.MINV | CLAMP.MAXV) - CLAMP.MAXV) + 1;
|
||||
if (rh < (1u << TEX0.TH) || CLAMP.MAXV != 0)
|
||||
{
|
||||
region.SetY(CLAMP.MAXV, (CLAMP.MINV | CLAMP.MAXV) + 1);
|
||||
GL_CACHE("TC: Region repeat optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight());
|
||||
}
|
||||
}
|
||||
|
||||
const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0));
|
||||
Source* src = nullptr;
|
||||
|
||||
// Region textures might be placed in a different page, so check that first.
|
||||
const u32 lookup_page = TEX0.TBP0 >> 5;
|
||||
if (region.GetMinX() != 0 || region.GetMinY() != 0)
|
||||
{
|
||||
const GSOffset offset(psm_s.info, TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
const u32 region_page = offset.bn(region.GetMinX(), region.GetMinY()) >> 5;
|
||||
if (lookup_page != region_page)
|
||||
src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[region_page]);
|
||||
}
|
||||
if (!src)
|
||||
src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[lookup_page]);
|
||||
|
||||
|
||||
Target* dst = nullptr;
|
||||
bool half_right = false;
|
||||
int x_offset = 0;
|
||||
int y_offset = 0;
|
||||
|
@ -293,7 +354,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
#ifdef DISABLE_HW_TEXTURE_CACHE
|
||||
if (0)
|
||||
#else
|
||||
if (src == NULL)
|
||||
if (!src)
|
||||
#endif
|
||||
{
|
||||
const u32 bp = TEX0.TBP0;
|
||||
|
@ -466,11 +527,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
GIFRegTEX0 depth_TEX0;
|
||||
depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
|
||||
depth_TEX0.U32[1] = TEX0.U32[1];
|
||||
return LookupDepthSource(depth_TEX0, TEXA, r);
|
||||
return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
return LookupDepthSource(TEX0, TEXA, r, true);
|
||||
return LookupDepthSource(TEX0, TEXA, CLAMP, r, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -496,7 +557,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
|
||||
}
|
||||
#endif
|
||||
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut);
|
||||
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1893,13 +1954,13 @@ void GSTextureCache::IncAge()
|
|||
}
|
||||
|
||||
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
|
||||
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut)
|
||||
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region)
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
Source* src = new Source(TEX0, TEXA, false);
|
||||
Source* src = new Source(TEX0, TEXA);
|
||||
|
||||
const int tw = 1 << TEX0.TW;
|
||||
const int th = 1 << TEX0.TH;
|
||||
int tw = 1 << TEX0.TW;
|
||||
int th = 1 << TEX0.TH;
|
||||
//int tp = TEX0.TBW << 6;
|
||||
int tlevels = 1;
|
||||
if (lod)
|
||||
|
@ -2211,8 +2272,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0) || gpu_clut;
|
||||
const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
|
||||
|
||||
// adjust texture size to fit
|
||||
src->m_region = region;
|
||||
tw = region.HasX() ? region.GetWidth() : tw;
|
||||
th = region.HasY() ? region.GetHeight() : th;
|
||||
|
||||
// try the hash cache
|
||||
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr)
|
||||
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr)
|
||||
{
|
||||
src->m_texture = src->m_from_hash_cache->texture;
|
||||
if (gpu_clut)
|
||||
|
@ -2245,6 +2311,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
ASSERT(src->m_from_target == (dst ? &dst->m_texture : nullptr));
|
||||
ASSERT(src->m_texture->GetScale() == ((!dst || TEX0.PSM == PSM_PSMT8) ? GSVector2(1, 1) : dst->m_texture->GetScale()));
|
||||
|
||||
src->SetPages();
|
||||
|
||||
m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex);
|
||||
|
||||
return src;
|
||||
|
@ -2253,7 +2321,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
// This really needs a better home...
|
||||
extern bool FMVstarted;
|
||||
|
||||
GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod)
|
||||
GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region)
|
||||
{
|
||||
// don't bother hashing if we're not dumping or replacing.
|
||||
const bool dump = GSConfig.DumpReplaceableTextures && (!FMVstarted || GSConfig.DumpTexturesWithFMVActive) &&
|
||||
|
@ -2265,13 +2333,13 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
|
|||
|
||||
// need the hash either for replacing, dumping or caching.
|
||||
// if dumping/replacing is on, we compute the clut hash regardless, since replacements aren't indexed
|
||||
HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod)};
|
||||
HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod, region)};
|
||||
|
||||
// handle dumping first, this is mostly isolated.
|
||||
if (dump)
|
||||
{
|
||||
// dump base level
|
||||
GSTextureReplacements::DumpTexture(key, TEX0, TEXA, g_gs_renderer->m_mem, 0);
|
||||
GSTextureReplacements::DumpTexture(key, TEX0, TEXA, region, g_gs_renderer->m_mem, 0);
|
||||
|
||||
// and the mips
|
||||
if (lod && GSConfig.DumpReplaceableMipmaps)
|
||||
|
@ -2281,7 +2349,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
|
|||
for (int mip = 1; mip < nmips; mip++)
|
||||
{
|
||||
const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)};
|
||||
GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, g_gs_renderer->m_mem, mip);
|
||||
GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, region, g_gs_renderer->m_mem, mip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2355,8 +2423,8 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
|
|||
return nullptr;
|
||||
|
||||
// expand/upload texture
|
||||
const int tw = 1 << TEX0.TW;
|
||||
const int th = 1 << TEX0.TH;
|
||||
const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
|
||||
const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
|
||||
const int tlevels = lod ? ((GSConfig.HWMipmap != HWMipmapLevel::Full) ? -1 : (lod->y - lod->x + 1)) : 1;
|
||||
GSTexture* tex = g_gs_device->CreateTexture(tw, th, tlevels, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color);
|
||||
if (!tex)
|
||||
|
@ -2366,7 +2434,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
|
|||
}
|
||||
|
||||
// upload base level
|
||||
PreloadTexture(TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, 0);
|
||||
PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0);
|
||||
|
||||
// upload mips if present
|
||||
if (lod)
|
||||
|
@ -2376,7 +2444,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
|
|||
for (int mip = 1; mip < nmips; mip++)
|
||||
{
|
||||
const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)};
|
||||
PreloadTexture(MIP_TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, mip);
|
||||
PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2649,12 +2717,13 @@ bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i
|
|||
|
||||
// GSTextureCache::Source
|
||||
|
||||
GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container)
|
||||
GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||
: m_palette_obj(nullptr)
|
||||
, m_palette(nullptr)
|
||||
, m_valid_rect(0, 0)
|
||||
, m_lod(0, 0)
|
||||
, m_target(false)
|
||||
, m_repeating(false)
|
||||
, m_p2t(NULL)
|
||||
, m_from_target(NULL)
|
||||
, m_from_target_TEX0(TEX0)
|
||||
|
@ -2662,32 +2731,8 @@ GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, b
|
|||
m_TEX0 = TEX0;
|
||||
m_TEXA = TEXA;
|
||||
|
||||
if (dummy_container)
|
||||
{
|
||||
// Dummy container only contain a m_texture that is a pointer to another source.
|
||||
|
||||
m_write.rect = NULL;
|
||||
m_write.count = 0;
|
||||
|
||||
m_repeating = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
|
||||
memset(m_layer_hash, 0, sizeof(m_layer_hash));
|
||||
|
||||
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
|
||||
m_write.count = 0;
|
||||
|
||||
m_repeating = m_TEX0.IsRepeating();
|
||||
|
||||
if (m_repeating && !CanPreload())
|
||||
{
|
||||
m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0);
|
||||
}
|
||||
|
||||
m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
}
|
||||
memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
|
||||
memset(m_layer_hash, 0, sizeof(m_layer_hash));
|
||||
}
|
||||
|
||||
GSTextureCache::Source::~Source()
|
||||
|
@ -2703,6 +2748,23 @@ GSTextureCache::Source::~Source()
|
|||
}
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::SetPages()
|
||||
{
|
||||
const int tw = 1 << m_TEX0.TW;
|
||||
const int th = 1 << m_TEX0.TH;
|
||||
|
||||
m_repeating = !m_from_hash_cache && m_TEX0.IsRepeating() && !m_region.IsFixedTEX0(tw, th);
|
||||
|
||||
if (m_repeating && !CanPreload())
|
||||
{
|
||||
// TODO: wrong for lupin/invalid tex0
|
||||
m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0);
|
||||
}
|
||||
|
||||
const GSVector4i rect(m_region.GetRect(tw, th));
|
||||
m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(rect);
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::Update(const GSVector4i& rect, int level)
|
||||
{
|
||||
Surface::UpdateAge();
|
||||
|
@ -2719,9 +2781,17 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int level)
|
|||
const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
|
||||
const int tw = 1 << m_TEX0.TW;
|
||||
const int th = 1 << m_TEX0.TH;
|
||||
const GSVector4i r = rect.ralign<Align_Outside>(bs);
|
||||
|
||||
if (r.eq(GSVector4i(0, 0, tw, th)))
|
||||
GSVector4i r(rect);
|
||||
const GSVector4i region_rect(m_region.GetRect(tw, th));
|
||||
|
||||
// Offset the pages we use by the clamp region.
|
||||
if (m_region.HasEither())
|
||||
r = (r + m_region.GetOffset(tw, th)).rintersect(region_rect);
|
||||
|
||||
r = r.ralign<Align_Outside>(bs);
|
||||
|
||||
if (region_rect.eq(m_region.HasEither() ? r.rintersect(region_rect) : r))
|
||||
m_complete_layers |= (1u << level);
|
||||
|
||||
const GSOffset& off = g_gs_renderer->m_context->offset.tex;
|
||||
|
@ -2818,6 +2888,9 @@ void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4
|
|||
|
||||
void GSTextureCache::Source::Write(const GSVector4i& r, int layer)
|
||||
{
|
||||
if (!m_write.rect)
|
||||
m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 32));
|
||||
|
||||
m_write.rect[m_write.count++] = r;
|
||||
|
||||
while (m_write.count >= 2)
|
||||
|
@ -2857,11 +2930,12 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
|
|||
// However the function is never called for these cases. This is just for information
|
||||
// should someone wish to use this function for these cases later.
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
||||
const SourceRegion region((layer == 0) ? m_region : m_region.AdjustForMipmap(layer));
|
||||
|
||||
const int tw = 1 << m_TEX0.TW;
|
||||
const int th = 1 << m_TEX0.TH;
|
||||
|
||||
const GSVector4i tr(0, 0, tw, th);
|
||||
// For the invalid tex0 case, the region might be larger than TEX0.TW/TH.
|
||||
const int tw = std::max(region.GetWidth(), 1u << m_TEX0.TW);
|
||||
const int th = std::max(region.GetHeight(), 1u << m_TEX0.TH);
|
||||
const GSVector4i tex_r(region.GetRect(tw, th));
|
||||
|
||||
int pitch = std::max(tw, psm.bs.x) * sizeof(u32);
|
||||
|
||||
|
@ -2877,35 +2951,33 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
|
|||
rtx = psm.rtxP;
|
||||
}
|
||||
|
||||
u8* buff = s_unswizzle_buffer;
|
||||
|
||||
for (u32 i = 0; i < count; i++)
|
||||
{
|
||||
const GSVector4i r = m_write.rect[i];
|
||||
const GSVector4i r(m_write.rect[i]);
|
||||
|
||||
if ((r > tr).mask() & 0xff00)
|
||||
{
|
||||
rtx(mem, off, r, buff, pitch, m_TEXA);
|
||||
|
||||
m_texture->Update(r.rintersect(tr), buff, pitch, layer);
|
||||
}
|
||||
else
|
||||
// if update rect lies to the left/above of the region rectangle, or extends past the texture bounds, we can't use a direct map
|
||||
if (((r > tex_r).mask() & 0xff00) == 0 && ((tex_r > r).mask() & 0x00ff) == 0)
|
||||
{
|
||||
GSTexture::GSMap m;
|
||||
|
||||
if (m_texture->Map(m, &r, layer))
|
||||
const GSVector4i map_r(r - tex_r.xyxy());
|
||||
if (m_texture->Map(m, &map_r, layer))
|
||||
{
|
||||
rtx(mem, off, r, m.bits, m.pitch, m_TEXA);
|
||||
|
||||
m_texture->Unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
rtx(mem, off, r, buff, pitch, m_TEXA);
|
||||
|
||||
m_texture->Update(r, buff, pitch, layer);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const GSVector4i rint(r.rintersect(tex_r));
|
||||
if (rint.width() == 0 || rint.height() == 0)
|
||||
continue;
|
||||
|
||||
rtx(mem, off, r, s_unswizzle_buffer, pitch, m_TEXA);
|
||||
|
||||
// need to offset if we're a region texture
|
||||
const u8* src = s_unswizzle_buffer + (pitch * static_cast<u32>(std::max(tex_r.top - r.top, 0))) +
|
||||
(static_cast<u32>(std::max(tex_r.left - r.left, 0)) << (m_palette ? 0 : 2));
|
||||
m_texture->Update(rint - tex_r.xyxy(), src, pitch, layer);
|
||||
}
|
||||
|
||||
if (count < m_write.count)
|
||||
|
@ -2920,7 +2992,7 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
|
|||
void GSTextureCache::Source::PreloadLevel(int level)
|
||||
{
|
||||
// m_TEX0 is adjusted for mips (messy, should be changed).
|
||||
const HashType hash = HashTexture(m_TEX0, m_TEXA);
|
||||
const HashType hash = HashTexture(m_TEX0, m_TEXA, m_region);
|
||||
|
||||
// Layer is complete again, regardless of whether the hash matches or not (and we reupload).
|
||||
const u8 layer_bit = static_cast<u8>(1) << level;
|
||||
|
@ -2934,7 +3006,7 @@ void GSTextureCache::Source::PreloadLevel(int level)
|
|||
m_layer_hash[level] = hash;
|
||||
|
||||
// And upload the texture.
|
||||
PreloadTexture(m_TEX0, m_TEXA, g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level);
|
||||
PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level);
|
||||
}
|
||||
|
||||
bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key)
|
||||
|
@ -3674,6 +3746,47 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GSTextureCache::SourceRegion::IsFixedTEX0(int tw, int th) const
|
||||
{
|
||||
return (GetMinX() >= static_cast<u32>(tw) || GetMinY() >= static_cast<u32>(th));
|
||||
}
|
||||
|
||||
GSVector4i GSTextureCache::SourceRegion::GetRect(int tw, int th) const
|
||||
{
|
||||
return GSVector4i(HasX() ? GetMinX() : 0, HasY() ? GetMinY() : 0, HasX() ? GetMaxX() : tw, HasY() ? GetMaxY() : th);
|
||||
}
|
||||
|
||||
GSVector4i GSTextureCache::SourceRegion::GetOffset(int tw, int th) const
|
||||
{
|
||||
const int xoffs = (GetMaxX() > static_cast<u32>(tw)) ? static_cast<int>(GetMinX()) : 0;
|
||||
const int yoffs = (GetMaxY() > static_cast<u32>(th)) ? static_cast<int>(GetMinY()) : 0;
|
||||
return GSVector4i(xoffs, yoffs, xoffs, yoffs);
|
||||
}
|
||||
|
||||
GSTextureCache::SourceRegion GSTextureCache::SourceRegion::AdjustForMipmap(u32 level) const
|
||||
{
|
||||
SourceRegion ret = {};
|
||||
if (HasX())
|
||||
{
|
||||
const u32 new_minx = GetMinX() >> level;
|
||||
const u32 new_maxx = ((GetMaxX() - 1) >> level) + 1;
|
||||
ret.SetX(new_minx, new_maxx);
|
||||
}
|
||||
if (HasY())
|
||||
{
|
||||
const u32 new_miny = GetMinY() >> level;
|
||||
const u32 new_maxy = ((GetMaxY() - 1) >> level) + 1;
|
||||
ret.SetY(new_miny, new_maxy);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void GSTextureCache::SourceRegion::AdjustTEX0(GIFRegTEX0* TEX0) const
|
||||
{
|
||||
const GSOffset offset(GSLocalMemory::m_psm[TEX0->PSM].info, TEX0->TBP0, TEX0->TBW, TEX0->PSM);
|
||||
TEX0->TBP0 += offset.bn(GetMinX(), GetMinY());
|
||||
}
|
||||
|
||||
using BlockHashState = XXH3_state_t;
|
||||
|
||||
__fi static void BlockHashReset(BlockHashState& st)
|
||||
|
@ -3696,16 +3809,16 @@ __fi static GSTextureCache::HashType FinishBlockHash(BlockHashState& st)
|
|||
return GSXXH3_64bits_digest(&st);
|
||||
}
|
||||
|
||||
static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, BlockHashState& hash_st, u8* temp)
|
||||
static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, BlockHashState& hash_st, u8* temp)
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
const GSVector2i& bs = psm.bs;
|
||||
const int tw = 1 << TEX0.TW;
|
||||
const int th = 1 << TEX0.TH;
|
||||
const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
|
||||
const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
|
||||
|
||||
// From GSLocalMemory foreachBlock(), used for reading textures.
|
||||
// We want to hash the exact same blocks here.
|
||||
const GSVector4i rect(0, 0, tw, th);
|
||||
const GSVector4i rect(region.GetRect(tw, th));
|
||||
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
|
||||
GSLocalMemory& mem = g_gs_renderer->m_mem;
|
||||
const GSOffset off = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
@ -3717,7 +3830,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
|
|||
// the texture data with other textures/framebuffers/etc (which is common).
|
||||
// Even though you might think this would be slower than just hashing for the hash
|
||||
// cache, it actually ends up faster (unswizzling is faster than hashing).
|
||||
if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu)
|
||||
if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu || region.GetMaxX() > 0 || region.GetMinY() > 0)
|
||||
{
|
||||
// Expand texture indices. Align to 32 bytes for AVX2.
|
||||
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(block_rect.z), 32);
|
||||
|
@ -3728,7 +3841,8 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
|
|||
rtx(mem, off, block_rect, temp, pitch, TEXA);
|
||||
|
||||
// Hash the expanded texture.
|
||||
u8* ptr = temp;
|
||||
u8* ptr = temp + (pitch * static_cast<u32>(rect.top - block_rect.top)) +
|
||||
static_cast<u32>(rect.left - block_rect.left);
|
||||
if (pitch == row_size)
|
||||
{
|
||||
BlockHashAccumulate(hash_st, ptr, pitch * static_cast<u32>(th));
|
||||
|
@ -3741,8 +3855,6 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
|
|||
}
|
||||
else
|
||||
{
|
||||
BlockHashReset(hash_st);
|
||||
|
||||
GSOffset::BNHelper bn = off.bnMulti(block_rect.left, block_rect.top);
|
||||
const int right = block_rect.right >> off.blockShiftX();
|
||||
const int bottom = block_rect.bottom >> off.blockShiftY();
|
||||
|
@ -3758,27 +3870,27 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
|
|||
}
|
||||
}
|
||||
|
||||
GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||
GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region)
|
||||
{
|
||||
BlockHashState hash_st;
|
||||
BlockHashReset(hash_st);
|
||||
HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer);
|
||||
HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer);
|
||||
return FinishBlockHash(hash_st);
|
||||
}
|
||||
|
||||
void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
|
||||
void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
|
||||
{
|
||||
// m_TEX0 is adjusted for mips (messy, should be changed).
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
const GSVector2i& bs = psm.bs;
|
||||
const int tw = 1 << TEX0.TW;
|
||||
const int th = 1 << TEX0.TH;
|
||||
const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
|
||||
const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
|
||||
|
||||
// Expand texture/apply palette.
|
||||
const GSVector4i rect(0, 0, tw, th);
|
||||
const GSVector4i rect(region.GetRect(tw, th));
|
||||
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
|
||||
const GSOffset off(mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM));
|
||||
const int read_width = std::max(tw, psm.bs.x);
|
||||
const int read_width = block_rect.width();
|
||||
u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
|
||||
GSLocalMemory::readTexture rtx = psm.rtx;
|
||||
if (paltex)
|
||||
|
@ -3788,8 +3900,9 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
}
|
||||
|
||||
// If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer.
|
||||
const GSVector4i unoffset_rect(0, 0, tw, th);
|
||||
GSTexture::GSMap map;
|
||||
if (rect.eq(block_rect) && tex->Map(map, &rect, level))
|
||||
if (rect.eq(block_rect) && tex->Map(map, &unoffset_rect, level))
|
||||
{
|
||||
rtx(mem, off, block_rect, map.bits, map.pitch, TEXA);
|
||||
tex->Unmap();
|
||||
|
@ -3801,7 +3914,10 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
|
||||
u8* buff = s_unswizzle_buffer;
|
||||
rtx(mem, off, block_rect, buff, pitch, TEXA);
|
||||
tex->Update(rect, buff, pitch, level);
|
||||
|
||||
const u8* ptr = buff + (pitch * static_cast<u32>(rect.top - block_rect.top)) +
|
||||
(static_cast<u32>(rect.left - block_rect.left) << (paltex ? 0 : 2));
|
||||
tex->Update(unoffset_rect, ptr, pitch, level);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3813,7 +3929,7 @@ GSTextureCache::HashCacheKey::HashCacheKey()
|
|||
TEXA.U64 = 0;
|
||||
}
|
||||
|
||||
GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod)
|
||||
GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region)
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
|
||||
|
@ -3821,12 +3937,13 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE
|
|||
ret.TEX0.U64 = TEX0.U64 & 0x00000007FFF00000ULL;
|
||||
ret.TEXA.U64 = (psm.pal == 0 && psm.fmt > 0) ? (TEXA.U64 & 0x000000FF000080FFULL) : 0;
|
||||
ret.CLUTHash = clut ? GSTextureCache::PaletteKeyHash{}({clut, psm.pal}) : 0;
|
||||
ret.region = region;
|
||||
|
||||
BlockHashState hash_st;
|
||||
BlockHashReset(hash_st);
|
||||
|
||||
// base level is always hashed
|
||||
HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer);
|
||||
HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer);
|
||||
|
||||
if (lod)
|
||||
{
|
||||
|
@ -3836,7 +3953,7 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE
|
|||
for (int i = 1; i < nmips; i++)
|
||||
{
|
||||
const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + i)};
|
||||
HashTextureLevel(MIP_TEX0, TEXA, hash_st, s_unswizzle_buffer);
|
||||
HashTextureLevel(MIP_TEX0, TEXA, region.AdjustForMipmap(i), hash_st, s_unswizzle_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3860,6 +3977,6 @@ void GSTextureCache::HashCacheKey::RemoveCLUTHash()
|
|||
u64 GSTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& key) const
|
||||
{
|
||||
std::size_t h = 0;
|
||||
HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64);
|
||||
HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64, key.region.bits);
|
||||
return h;
|
||||
}
|
||||
|
|
|
@ -44,6 +44,42 @@ public:
|
|||
return valid && overlap;
|
||||
}
|
||||
|
||||
struct SourceRegion
|
||||
{
|
||||
u64 bits;
|
||||
|
||||
bool HasX() const { return static_cast<u32>(bits) != 0; }
|
||||
bool HasY() const { return static_cast<u32>(bits >> 32) != 0; }
|
||||
bool HasEither() const { return (bits != 0); }
|
||||
|
||||
void SetX(u32 min, u32 max) { bits |= (min | (max << 16)); }
|
||||
void SetY(u32 min, u32 max) { bits |= ((static_cast<u64>(min) << 32) | (static_cast<u64>(max) << 48)); }
|
||||
|
||||
u32 GetMinX() const { return static_cast<u32>(bits) & 0xFFFFu; }
|
||||
u32 GetMaxX() const { return static_cast<u32>(bits >> 16) & 0xFFFFu; }
|
||||
u32 GetMinY() const { return static_cast<u32>(bits >> 32) & 0xFFFFu; }
|
||||
u32 GetMaxY() const { return static_cast<u32>(bits >> 48); }
|
||||
|
||||
u32 GetWidth() const { return (GetMaxX() - GetMinX()); }
|
||||
u32 GetHeight() const { return (GetMaxY() - GetMinY()); }
|
||||
|
||||
/// Returns true if the area of the region exceeds the TW/TH size (i.e. "fixed tex0").
|
||||
bool IsFixedTEX0(int tw, int th) const;
|
||||
|
||||
/// Returns the rectangle relative to the texture base pointer that the region occupies.
|
||||
GSVector4i GetRect(int tw, int th) const;
|
||||
|
||||
/// When TW/TH is less than the extents covered by the region ("fixed tex0"), returns the offset
|
||||
/// which should be applied to any coordinates to relocate them to the actual region.
|
||||
GSVector4i GetOffset(int tw, int th) const;
|
||||
|
||||
/// Reduces the range of texels relative to the specified mipmap level.
|
||||
SourceRegion AdjustForMipmap(u32 level) const;
|
||||
|
||||
/// Adjusts the texture base pointer and block width relative to the region.
|
||||
void AdjustTEX0(GIFRegTEX0* TEX0) const;
|
||||
};
|
||||
|
||||
using HashType = u64;
|
||||
|
||||
struct HashCacheKey
|
||||
|
@ -51,10 +87,11 @@ public:
|
|||
HashType TEX0Hash, CLUTHash;
|
||||
GIFRegTEX0 TEX0;
|
||||
GIFRegTEXA TEXA;
|
||||
SourceRegion region;
|
||||
|
||||
HashCacheKey();
|
||||
|
||||
static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod);
|
||||
static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region);
|
||||
|
||||
HashCacheKey WithRemovedCLUTHash() const;
|
||||
void RemoveCLUTHash();
|
||||
|
@ -148,7 +185,7 @@ public:
|
|||
{
|
||||
GSVector4i* rect;
|
||||
u32 count;
|
||||
} m_write;
|
||||
} m_write = {};
|
||||
|
||||
void PreloadLevel(int level);
|
||||
|
||||
|
@ -161,6 +198,7 @@ public:
|
|||
GSTexture* m_palette;
|
||||
GSVector4i m_valid_rect;
|
||||
GSVector2i m_lod;
|
||||
SourceRegion m_region = {};
|
||||
u8 m_valid_hashes = 0;
|
||||
u8 m_complete_layers = 0;
|
||||
bool m_target;
|
||||
|
@ -178,11 +216,13 @@ public:
|
|||
GSOffset::PageLooper m_pages;
|
||||
|
||||
public:
|
||||
Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container = false);
|
||||
Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||
virtual ~Source();
|
||||
|
||||
__fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); }
|
||||
|
||||
void SetPages();
|
||||
|
||||
void Update(const GSVector4i& rect, int layer = 0);
|
||||
void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0);
|
||||
|
||||
|
@ -322,7 +362,7 @@ protected:
|
|||
std::unique_ptr<GSDownloadTexture> m_uint16_download_texture;
|
||||
std::unique_ptr<GSDownloadTexture> m_uint32_download_texture;
|
||||
|
||||
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut);
|
||||
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
|
||||
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
|
||||
|
||||
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
|
||||
|
@ -332,10 +372,10 @@ protected:
|
|||
/// Resizes the download texture if needed.
|
||||
bool PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex);
|
||||
|
||||
HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod);
|
||||
HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region);
|
||||
|
||||
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
|
||||
static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
|
||||
static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region);
|
||||
|
||||
// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
|
||||
// TODO: virtual void Write(Target* t, const GSVector4i& r) = 0;
|
||||
|
@ -358,8 +398,8 @@ public:
|
|||
|
||||
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
|
||||
|
||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod);
|
||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
|
||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod);
|
||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette = false);
|
||||
|
||||
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData);
|
||||
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
|
||||
|
|
|
@ -42,6 +42,8 @@
|
|||
// this is a #define instead of a variable to avoid warnings from non-literal format strings
|
||||
#define TEXTURE_FILENAME_FORMAT_STRING "%" PRIx64 "-%08x"
|
||||
#define TEXTURE_FILENAME_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-%08x"
|
||||
#define TEXTURE_FILENAME_REGION_FORMAT_STRING "%" PRIx64 "-r%" PRIx64 "-" "-%08x"
|
||||
#define TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-r%" PRIx64 "-%08x"
|
||||
#define TEXTURE_REPLACEMENT_SUBDIRECTORY_NAME "replacements"
|
||||
#define TEXTURE_DUMP_SUBDIRECTORY_NAME "dumps"
|
||||
|
||||
|
@ -51,6 +53,7 @@ namespace
|
|||
{
|
||||
u64 TEX0Hash;
|
||||
u64 CLUTHash;
|
||||
GSTextureCache::SourceRegion region;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -68,9 +71,10 @@ namespace
|
|||
};
|
||||
u32 miplevel;
|
||||
|
||||
__fi u32 Width() const { return (1u << TEX0_TW); }
|
||||
__fi u32 Height() const { return (1u << TEX0_TH); }
|
||||
__fi u32 Width() const { return (region.HasX() ? region.GetWidth() : (1u << TEX0_TW)); }
|
||||
__fi u32 Height() const { return (region.HasY() ? region.GetWidth() : (1u << TEX0_TH)); }
|
||||
__fi bool HasPalette() const { return (GSLocalMemory::m_psm[TEX0_PSM].pal > 0); }
|
||||
__fi bool HasRegion() const { return region.HasEither(); }
|
||||
|
||||
__fi GSVector2 ReplacementScale(const GSTextureReplacements::ReplacementTexture& rtex) const
|
||||
{
|
||||
|
@ -79,14 +83,27 @@ namespace
|
|||
|
||||
__fi GSVector2 ReplacementScale(u32 rwidth, u32 rheight) const
|
||||
{
|
||||
return GSVector2(static_cast<float>(rwidth) / static_cast<float>(Width()), static_cast<float>(rheight) / static_cast<float>(Height()));
|
||||
return GSVector2(static_cast<float>(rwidth) / static_cast<float>(Width()),
|
||||
static_cast<float>(rheight) / static_cast<float>(Height()));
|
||||
}
|
||||
|
||||
__fi bool operator==(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) == std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
|
||||
__fi bool operator!=(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) != std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
|
||||
__fi bool operator<(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) < std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
|
||||
__fi bool operator==(const TextureName& rhs) const
|
||||
{
|
||||
return std::tie(TEX0Hash, CLUTHash, region.bits, bits) ==
|
||||
std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
|
||||
}
|
||||
__fi bool operator!=(const TextureName& rhs) const
|
||||
{
|
||||
return std::tie(TEX0Hash, CLUTHash, region.bits, bits) !=
|
||||
std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
|
||||
}
|
||||
__fi bool operator<(const TextureName& rhs) const
|
||||
{
|
||||
return std::tie(TEX0Hash, CLUTHash, region.bits, bits) <
|
||||
std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(TextureName) == 24, "ReplacementTextureName is expected size");
|
||||
static_assert(sizeof(TextureName) == 32, "ReplacementTextureName is expected size");
|
||||
} // namespace
|
||||
|
||||
namespace std
|
||||
|
@ -97,7 +114,7 @@ namespace std
|
|||
std::size_t operator()(const TextureName& val) const
|
||||
{
|
||||
std::size_t h = 0;
|
||||
HashCombine(h, val.TEX0Hash, val.CLUTHash, val.bits, val.miplevel);
|
||||
HashCombine(h, val.TEX0Hash, val.CLUTHash, val.region.bits, val.bits, val.miplevel);
|
||||
return h;
|
||||
}
|
||||
};
|
||||
|
@ -169,6 +186,7 @@ TextureName GSTextureReplacements::CreateTextureName(const GSTextureCache::HashC
|
|||
name.TEX0Hash = hash.TEX0Hash;
|
||||
name.CLUTHash = name.HasPalette() ? hash.CLUTHash : 0;
|
||||
name.miplevel = miplevel;
|
||||
name.region = hash.region;
|
||||
return name;
|
||||
}
|
||||
|
||||
|
@ -184,6 +202,7 @@ GSTextureCache::HashCacheKey GSTextureReplacements::HashCacheKeyFromTextureName(
|
|||
key.TEXA.TA1 = tn.TEXA_TA1;
|
||||
key.TEX0Hash = tn.TEX0Hash;
|
||||
key.CLUTHash = tn.HasPalette() ? tn.CLUTHash : 0;
|
||||
key.region = tn.region;
|
||||
return key;
|
||||
}
|
||||
|
||||
|
@ -192,15 +211,38 @@ std::optional<TextureName> GSTextureReplacements::ParseReplacementName(const std
|
|||
TextureName ret;
|
||||
ret.miplevel = 0;
|
||||
|
||||
// TODO(Stenzek): Make this better.
|
||||
char extension_dot;
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits, &extension_dot) != 4 || extension_dot != '.')
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash,
|
||||
&ret.region.bits, &ret.bits, &extension_dot) == 5 &&
|
||||
extension_dot == '.')
|
||||
{
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) != 3 || extension_dot != '.')
|
||||
return std::nullopt;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.region.bits,
|
||||
&ret.bits, &extension_dot) == 4 &&
|
||||
extension_dot == '.')
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret.region.bits = 0;
|
||||
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits,
|
||||
&extension_dot) == 4 &&
|
||||
extension_dot == '.')
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) ==
|
||||
3 &&
|
||||
extension_dot == '.')
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::string GSTextureReplacements::GetGameTextureDirectory()
|
||||
|
@ -229,23 +271,45 @@ std::string GSTextureReplacements::GetDumpFilename(const TextureName& name, u32
|
|||
|
||||
const std::string game_subdir(Path::Combine(game_dir, TEXTURE_DUMP_SUBDIRECTORY_NAME));
|
||||
|
||||
if (name.HasPalette())
|
||||
std::string filename;
|
||||
if (name.HasRegion())
|
||||
{
|
||||
const std::string filename(
|
||||
(level > 0) ?
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.CLUTHash, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png", name.TEX0Hash, name.CLUTHash, name.bits));
|
||||
ret = Path::Combine(game_subdir, filename);
|
||||
if (name.HasPalette())
|
||||
{
|
||||
filename = (level > 0) ?
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "-mip%u.png",
|
||||
name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING ".png",
|
||||
name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits);
|
||||
}
|
||||
else
|
||||
{
|
||||
filename = (level > 0) ? StringUtil::StdStringFromFormat(
|
||||
TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(
|
||||
TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::string filename(
|
||||
(level > 0) ?
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits));
|
||||
ret = Path::Combine(game_subdir, filename);
|
||||
if (name.HasPalette())
|
||||
{
|
||||
filename = (level > 0) ? StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png",
|
||||
name.TEX0Hash, name.CLUTHash, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png",
|
||||
name.TEX0Hash, name.CLUTHash, name.bits);
|
||||
}
|
||||
else
|
||||
{
|
||||
filename = (level > 0) ? StringUtil::StdStringFromFormat(
|
||||
TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
|
||||
StringUtil::StdStringFromFormat(
|
||||
TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits);
|
||||
}
|
||||
}
|
||||
|
||||
ret = Path::Combine(game_subdir, filename);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -569,7 +633,8 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures()
|
|||
s_async_loaded_textures.clear();
|
||||
}
|
||||
|
||||
void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level)
|
||||
void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0,
|
||||
const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level)
|
||||
{
|
||||
// check if it's been dumped or replaced already
|
||||
const TextureName name(CreateTextureName(hash, level));
|
||||
|
@ -589,12 +654,12 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash
|
|||
// compute width/height
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
const GSVector2i& bs = psm.bs;
|
||||
const int tw = 1 << TEX0.TW;
|
||||
const int th = 1 << TEX0.TH;
|
||||
const GSVector4i rect(0, 0, tw, th);
|
||||
const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
|
||||
const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
|
||||
const GSVector4i rect(region.GetRect(tw, th));
|
||||
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
|
||||
const int read_width = std::max(tw, psm.bs.x);
|
||||
const int read_height = std::max(th, psm.bs.y);
|
||||
const int read_width = block_rect.width();
|
||||
const int read_height = block_rect.height();
|
||||
const u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
|
||||
|
||||
// use per-texture buffer so we can compress the texture asynchronously and not block the GS thread
|
||||
|
@ -603,8 +668,9 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash
|
|||
psm.rtx(mem, mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM), block_rect, buffer.GetPtr(), pitch, TEXA);
|
||||
|
||||
// okay, now we can actually dump it
|
||||
QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer)]() {
|
||||
if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr(), pitch))
|
||||
const u32 buffer_offset = ((rect.top - block_rect.top) * pitch) + ((rect.left - block_rect.left) * sizeof(u32));
|
||||
QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer), buffer_offset]() {
|
||||
if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr() + buffer_offset, pitch))
|
||||
Console.Error("Failed to dump texture to '%s'.", filename.c_str());
|
||||
});
|
||||
}
|
||||
|
|
|
@ -52,7 +52,8 @@ namespace GSTextureReplacements
|
|||
GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, const GSVector2& scale, bool mipmap);
|
||||
void ProcessAsyncLoadedTextures();
|
||||
|
||||
void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level);
|
||||
void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
|
||||
GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level);
|
||||
void ClearDumpedTextureList();
|
||||
|
||||
/// Loader will take a filename and interpret the format (e.g. DDS, PNG, etc).
|
||||
|
|
|
@ -1374,6 +1374,8 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
|
|||
setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC);
|
||||
setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS);
|
||||
setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT);
|
||||
setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS);
|
||||
setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT);
|
||||
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
|
||||
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
|
||||
|
@ -1403,7 +1405,6 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
|
|||
setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD);
|
||||
setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD);
|
||||
setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER);
|
||||
setFnConstantB(m_fn_constants, pssel.invalid_tex0, GSMTLConstantIndex_PS_INVALID_TEX0);
|
||||
setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK);
|
||||
auto newps = LoadShader(@"ps_main");
|
||||
ps = newps;
|
||||
|
@ -1594,10 +1595,10 @@ static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, WH) == of
|
|||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.x) == offsetof(GSMTLMainPSUniform, ta));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.z) == offsetof(GSMTLMainPSUniform, max_depth));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.w) == offsetof(GSMTLMainPSUniform, alpha_fix));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MskFix) == offsetof(GSMTLMainPSUniform, uv_msk_fix));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FbMask) == offsetof(GSMTLMainPSUniform, fbmask));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, HalfTexel) == offsetof(GSMTLMainPSUniform, half_texel));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MinMax) == offsetof(GSMTLMainPSUniform, uv_min_max));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STRange) == offsetof(GSMTLMainPSUniform, st_range));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle) == offsetof(GSMTLMainPSUniform, channel_shuffle));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack) == offsetof(GSMTLMainPSUniform, tc_offset));
|
||||
static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale) == offsetof(GSMTLMainPSUniform, st_scale));
|
||||
|
|
|
@ -108,11 +108,15 @@ struct GSMTLMainPSUniform
|
|||
vector_float2 ta;
|
||||
float max_depth;
|
||||
float alpha_fix;
|
||||
vector_uint4 uv_msk_fix;
|
||||
vector_uint4 fbmask;
|
||||
|
||||
vector_float4 half_texel;
|
||||
vector_float4 uv_min_max;
|
||||
union
|
||||
{
|
||||
vector_float4 uv_min_max;
|
||||
vector_uint4 uv_msk_fix;
|
||||
};
|
||||
vector_float4 st_range;
|
||||
struct
|
||||
{
|
||||
unsigned int blue_mask;
|
||||
|
@ -166,6 +170,8 @@ enum GSMTLFnConstants
|
|||
GSMTLConstantIndex_PS_TCC,
|
||||
GSMTLConstantIndex_PS_WMS,
|
||||
GSMTLConstantIndex_PS_WMT,
|
||||
GSMTLConstantIndex_PS_ADJS,
|
||||
GSMTLConstantIndex_PS_ADJT,
|
||||
GSMTLConstantIndex_PS_LTF,
|
||||
GSMTLConstantIndex_PS_SHUFFLE,
|
||||
GSMTLConstantIndex_PS_READ_BA,
|
||||
|
@ -194,6 +200,5 @@ enum GSMTLFnConstants
|
|||
GSMTLConstantIndex_PS_AUTOMATIC_LOD,
|
||||
GSMTLConstantIndex_PS_MANUAL_LOD,
|
||||
GSMTLConstantIndex_PS_POINT_SAMPLER,
|
||||
GSMTLConstantIndex_PS_INVALID_TEX0,
|
||||
GSMTLConstantIndex_PS_SCANMSK,
|
||||
};
|
||||
|
|
|
@ -37,6 +37,8 @@ constant uint PS_TFX [[function_constant(GSMTLConstantIndex_PS_TF
|
|||
constant bool PS_TCC [[function_constant(GSMTLConstantIndex_PS_TCC)]];
|
||||
constant uint PS_WMS [[function_constant(GSMTLConstantIndex_PS_WMS)]];
|
||||
constant uint PS_WMT [[function_constant(GSMTLConstantIndex_PS_WMT)]];
|
||||
constant bool PS_ADJS [[function_constant(GSMTLConstantIndex_PS_ADJS)]];
|
||||
constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_ADJT)]];
|
||||
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
|
||||
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
|
||||
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
|
||||
|
@ -65,7 +67,6 @@ constant bool PS_TEX_IS_FB [[function_constant(GSMTLConstantIndex_PS_TE
|
|||
constant bool PS_AUTOMATIC_LOD [[function_constant(GSMTLConstantIndex_PS_AUTOMATIC_LOD)]];
|
||||
constant bool PS_MANUAL_LOD [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]];
|
||||
constant bool PS_POINT_SAMPLER [[function_constant(GSMTLConstantIndex_PS_POINT_SAMPLER)]];
|
||||
constant bool PS_INVALID_TEX0 [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]];
|
||||
constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]];
|
||||
|
||||
constant GSMTLExpandType VS_EXPAND_TYPE = static_cast<GSMTLExpandType>(VS_EXPAND_TYPE_RAW);
|
||||
|
@ -321,7 +322,21 @@ struct PSMain
|
|||
// As of 2018 this issue is still present.
|
||||
uv = (trunc(uv * cb.wh.zw) + 0.5) / cb.wh.zw;
|
||||
}
|
||||
uv *= cb.st_scale;
|
||||
if (!PS_ADJS && !PS_ADJT)
|
||||
{
|
||||
uv *= cb.st_scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (PS_ADJS)
|
||||
uv.x = (uv.x - cb.st_range.x) * cb.st_range.z;
|
||||
else
|
||||
uv.x = uv.x * cb.st_scale.x;
|
||||
if (PS_ADJT)
|
||||
uv.y = (uv.y - cb.st_range.y) * cb.st_range.w;
|
||||
else
|
||||
uv.y = uv.y * cb.st_scale.y;
|
||||
}
|
||||
|
||||
if (PS_AUTOMATIC_LOD)
|
||||
{
|
||||
|
@ -360,7 +375,7 @@ struct PSMain
|
|||
float4 clamp_wrap_uv(float4 uv)
|
||||
{
|
||||
float4 uv_out = uv;
|
||||
float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy;
|
||||
float4 tex_size = cb.wh.xyxy;
|
||||
|
||||
if (PS_WMS == PS_WMT)
|
||||
{
|
||||
|
@ -724,12 +739,7 @@ struct PSMain
|
|||
float4 ps_color()
|
||||
{
|
||||
float2 st, st_int;
|
||||
if (!FST && PS_INVALID_TEX0)
|
||||
{
|
||||
st = (in.t.xy * cb.wh.xy) / (in.t.w * cb.wh.zw);
|
||||
st_int = (in.ti.zw * cb.wh.xy) / (in.t.w * cb.wh.zw);
|
||||
}
|
||||
else if (!FST)
|
||||
if (!FST)
|
||||
{
|
||||
st = in.t.xy / in.t.w;
|
||||
st_int = in.ti.zw / in.t.w;
|
||||
|
|
|
@ -1029,6 +1029,8 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||
std::string macro = fmt::format("#define PS_FST {}\n", sel.fst)
|
||||
+ fmt::format("#define PS_WMS {}\n", sel.wms)
|
||||
+ fmt::format("#define PS_WMT {}\n", sel.wmt)
|
||||
+ fmt::format("#define PS_ADJS {}\n", sel.adjs)
|
||||
+ fmt::format("#define PS_ADJT {}\n", sel.adjt)
|
||||
+ fmt::format("#define PS_AEM_FMT {}\n", sel.aem_fmt)
|
||||
+ fmt::format("#define PS_PAL_FMT {}\n", sel.pal_fmt)
|
||||
+ fmt::format("#define PS_DFMT {}\n", sel.dfmt)
|
||||
|
@ -1037,7 +1039,6 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||
+ fmt::format("#define PS_URBAN_CHAOS_HLE {}\n", sel.urban_chaos_hle)
|
||||
+ fmt::format("#define PS_TALES_OF_ABYSS_HLE {}\n", sel.tales_of_abyss_hle)
|
||||
+ fmt::format("#define PS_TEX_IS_FB {}\n", sel.tex_is_fb)
|
||||
+ fmt::format("#define PS_INVALID_TEX0 {}\n", sel.invalid_tex0)
|
||||
+ fmt::format("#define PS_AEM {}\n", sel.aem)
|
||||
+ fmt::format("#define PS_TFX {}\n", sel.tfx)
|
||||
+ fmt::format("#define PS_TCC {}\n", sel.tcc)
|
||||
|
|
|
@ -1948,6 +1948,8 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
|||
AddMacro(ss, "PS_FST", sel.fst);
|
||||
AddMacro(ss, "PS_WMS", sel.wms);
|
||||
AddMacro(ss, "PS_WMT", sel.wmt);
|
||||
AddMacro(ss, "PS_ADJS", sel.adjs);
|
||||
AddMacro(ss, "PS_ADJT", sel.adjt);
|
||||
AddMacro(ss, "PS_AEM_FMT", sel.aem_fmt);
|
||||
AddMacro(ss, "PS_PAL_FMT", sel.pal_fmt);
|
||||
AddMacro(ss, "PS_DFMT", sel.dfmt);
|
||||
|
@ -1955,7 +1957,6 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
|||
AddMacro(ss, "PS_CHANNEL_FETCH", sel.channel);
|
||||
AddMacro(ss, "PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);
|
||||
AddMacro(ss, "PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
|
||||
AddMacro(ss, "PS_INVALID_TEX0", sel.invalid_tex0);
|
||||
AddMacro(ss, "PS_AEM", sel.aem);
|
||||
AddMacro(ss, "PS_TFX", sel.tfx);
|
||||
AddMacro(ss, "PS_TCC", sel.tcc);
|
||||
|
|
|
@ -15,4 +15,4 @@
|
|||
|
||||
/// Version number for GS and other shaders. Increment whenever any of the contents of the
|
||||
/// shaders change, to invalidate the cache.
|
||||
static constexpr u32 SHADER_CACHE_VERSION = 11;
|
||||
static constexpr u32 SHADER_CACHE_VERSION = 12;
|
||||
|
|
Loading…
Reference in New Issue