From 7d08a54ad9412d92757c5c4193b8f4c24e212d95 Mon Sep 17 00:00:00 2001
From: Stenzek <stenzek@gmail.com>
Date: Tue, 31 Jan 2023 20:50:45 +1000
Subject: [PATCH] GS/HW: Optimize TC source size based on CLAMP

---
 bin/resources/shaders/dx11/tfx.fx             |  39 +-
 .../shaders/opengl/common_header.glsl         |   8 +-
 bin/resources/shaders/opengl/tfx_fs.glsl      |  33 +-
 bin/resources/shaders/vulkan/tfx.glsl         |  40 +-
 pcsx2/GS/GSDrawingContext.cpp                 |  49 +--
 pcsx2/GS/GSDrawingContext.h                   |   9 -
 pcsx2/GS/GSRegs.h                             |   1 +
 pcsx2/GS/GSState.cpp                          |   8 +-
 pcsx2/GS/Renderers/Common/GSDevice.h          |   5 +-
 pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp     |   3 +-
 pcsx2/GS/Renderers/DX12/GSDevice12.cpp        |   3 +-
 pcsx2/GS/Renderers/HW/GSRendererHW.cpp        |  81 ++--
 pcsx2/GS/Renderers/HW/GSTextureCache.cpp      | 351 ++++++++++++------
 pcsx2/GS/Renderers/HW/GSTextureCache.h        |  58 ++-
 .../GS/Renderers/HW/GSTextureReplacements.cpp | 130 +++++--
 pcsx2/GS/Renderers/HW/GSTextureReplacements.h |   3 +-
 pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm       |   5 +-
 pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h  |  11 +-
 pcsx2/GS/Renderers/Metal/tfx.metal            |  28 +-
 pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp     |   3 +-
 pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp      |   3 +-
 pcsx2/ShaderCacheVersion.h                    |   2 +-
 22 files changed, 554 insertions(+), 319 deletions(-)

diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx
index 518185aade..ab15f78c95 100644
--- a/bin/resources/shaders/dx11/tfx.fx
+++ b/bin/resources/shaders/dx11/tfx.fx
@@ -21,6 +21,8 @@
 #define PS_FST 0
 #define PS_WMS 0
 #define PS_WMT 0
+#define PS_ADJS 0
+#define PS_ADJT 0
 #define PS_AEM_FMT FMT_32
 #define PS_AEM 0
 #define PS_TFX 0
@@ -42,7 +44,6 @@
 #define PS_CHANNEL_FETCH 0
 #define PS_TALES_OF_ABYSS_HLE 0
 #define PS_URBAN_CHAOS_HLE 0
-#define PS_INVALID_TEX0 0
 #define PS_SCALE_FACTOR 1.0
 #define PS_HDR 0
 #define PS_COLCLIP 0
@@ -158,10 +159,10 @@ cbuffer cb1
 	float2 TA;
 	float MaxDepthPS;
 	float Af;
-	uint4 MskFix;
 	uint4 FbMask;
 	float4 HalfTexel;
 	float4 MinMax;
+	float4 STRange;
 	int4 ChannelShuffle;
 	float2 TC_OffsetHack;
 	float2 STScale;
@@ -183,7 +184,20 @@ float4 sample_c(float2 uv, float uv_w)
 		// As of 2018 this issue is still present.
 		uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw;
 	}
+#if !PS_ADJS && !PS_ADJT
 	uv *= STScale;
+#else
+	#if PS_ADJS
+		uv.x = (uv.x - STRange.x) * STRange.z;
+	#else
+		uv.x = uv.x * STScale.x;
+	#endif
+	#if PS_ADJT
+		uv.y = (uv.y - STRange.y) * STRange.w;
+	#else
+		uv.y = uv.y * STScale.y;
+	#endif
+#endif
 
 #if PS_AUTOMATIC_LOD == 1
 	return Texture.Sample(TextureSampler, uv);
@@ -218,12 +232,7 @@ float4 sample_p_norm(float u)
 
 float4 clamp_wrap_uv(float4 uv)
 {
-	float4 tex_size;
-
-	if (PS_INVALID_TEX0 == 1)
-		tex_size = WH.zwzw;
-	else
-		tex_size = WH.xyxy;
+	float4 tex_size = WH.xyxy;
 
 	if(PS_WMS == PS_WMT)
 	{
@@ -238,7 +247,7 @@ float4 clamp_wrap_uv(float4 uv)
 			// textures. Fixes Xenosaga's hair issue.
 			uv = frac(uv);
 			#endif
-			uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
+			uv = (float4)(((uint4)(uv * tex_size) & asuint(MinMax.xyxy)) | asuint(MinMax.zwzw)) / tex_size;
 		}
 	}
 	else
@@ -252,7 +261,7 @@ float4 clamp_wrap_uv(float4 uv)
 			#if PS_FST == 0
 			uv.xz = frac(uv.xz);
 			#endif
-			uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
+			uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & asuint(MinMax.xx)) | asuint(MinMax.zz)) / tex_size.xx;
 		}
 		if(PS_WMT == 2)
 		{
@@ -263,7 +272,7 @@ float4 clamp_wrap_uv(float4 uv)
 			#if PS_FST == 0
 			uv.yw = frac(uv.yw);
 			#endif
-			uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
+			uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & asuint(MinMax.yy)) | asuint(MinMax.ww)) / tex_size.yy;
 		}
 	}
 
@@ -353,7 +362,7 @@ float4 fetch_c(int2 uv)
 
 int2 clamp_wrap_uv_depth(int2 uv)
 {
-	int4 mask = (int4)MskFix << 4;
+	int4 mask = asint(MinMax) << 4;
 	if (PS_WMS == PS_WMT)
 	{
 		if (PS_WMS == 2)
@@ -676,11 +685,7 @@ float4 fog(float4 c, float f)
 
 float4 ps_color(PS_INPUT input)
 {
-#if PS_FST == 0 && PS_INVALID_TEX0 == 1
-	// Re-normalize coordinate from invalid GS to corrected texture size
-	float2 st = (input.t.xy * WH.xy) / (input.t.w * WH.zw);
-	float2 st_int = (input.ti.zw * WH.xy) / (input.t.w * WH.zw);
-#elif PS_FST == 0
+#if PS_FST == 0
 	float2 st = input.t.xy / input.t.w;
 	float2 st_int = input.ti.zw / input.t.w;
 #else
diff --git a/bin/resources/shaders/opengl/common_header.glsl b/bin/resources/shaders/opengl/common_header.glsl
index ac08be64d6..9a7ce8589c 100644
--- a/bin/resources/shaders/opengl/common_header.glsl
+++ b/bin/resources/shaders/opengl/common_header.glsl
@@ -75,13 +75,12 @@ layout(std140, binding = 0) uniform cb21
     float MaxDepthPS;
     float Af;
 
-    uvec4 MskFix;
-
     uvec4 FbMask;
 
     vec4 HalfTexel;
 
     vec4 MinMax;
+    vec4 STRange;
 
     ivec4 ChannelShuffle;
 
@@ -92,11 +91,6 @@ layout(std140, binding = 0) uniform cb21
 };
 #endif
 
-//layout(std140, binding = 22) uniform cb22
-//{
-//    vec4 rt_size;
-//};
-
 //////////////////////////////////////////////////////////////////////
 // Default Sampler
 //////////////////////////////////////////////////////////////////////
diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl
index 669cd34448..2f0a82aef2 100644
--- a/bin/resources/shaders/opengl/tfx_fs.glsl
+++ b/bin/resources/shaders/opengl/tfx_fs.glsl
@@ -109,7 +109,20 @@ vec4 sample_c(vec2 uv)
     // As of 2018 this issue is still present.
     uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
 #endif
-    uv *= STScale;
+#if !PS_ADJS && !PS_ADJT
+	uv *= STScale;
+#else
+	#if PS_ADJS
+		uv.x = (uv.x - STRange.x) * STRange.z;
+	#else
+		uv.x = uv.x * STScale.x;
+	#endif
+	#if PS_ADJT
+		uv.y = (uv.y - STRange.y) * STRange.w;
+	#else
+		uv.y = uv.y * STScale.y;
+	#endif
+#endif
 
 #if PS_AUTOMATIC_LOD == 1
     return texture(TextureSampler, uv);
@@ -146,11 +159,7 @@ vec4 sample_p_norm(float u)
 vec4 clamp_wrap_uv(vec4 uv)
 {
     vec4 uv_out = uv;
-#if PS_INVALID_TEX0 == 1
-    vec4 tex_size = WH.zwzw;
-#else
     vec4 tex_size = WH.xyxy;
-#endif
 
 #if PS_WMS == PS_WMT
 
@@ -162,7 +171,7 @@ vec4 clamp_wrap_uv(vec4 uv)
     // textures. Fixes Xenosaga's hair issue.
     uv = fract(uv);
     #endif
-    uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
+    uv_out = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
 #endif
 
 #else // PS_WMS != PS_WMT
@@ -174,7 +183,7 @@ vec4 clamp_wrap_uv(vec4 uv)
     #if PS_FST == 0
     uv.xz = fract(uv.xz);
     #endif
-    uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
+    uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
 
 #endif
 
@@ -185,7 +194,7 @@ vec4 clamp_wrap_uv(vec4 uv)
     #if PS_FST == 0
     uv.yw = fract(uv.yw);
     #endif
-    uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
+    uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
 #endif
 
 #endif
@@ -288,7 +297,7 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv)
 
     // Keep the full precision
     // It allow to multiply the ScalingFactor before the 1/16 coeff
-    ivec4 mask = ivec4(MskFix) << 4;
+    ivec4 mask = floatBitsToInt(MinMax) << 4;
 
 #if PS_WMS == PS_WMT
 
@@ -591,11 +600,7 @@ void fog(inout vec4 C, float f)
 vec4 ps_color()
 {
     //FIXME: maybe we can set gl_Position.w = q in VS
-#if (PS_FST == 0) && (PS_INVALID_TEX0 == 1)
-    // Re-normalize coordinate from invalid GS to corrected texture size
-    vec2 st = (PSin.t_float.xy * WH.xy) / (vec2(PSin.t_float.w) * WH.zw);
-    vec2 st_int = (PSin.t_int.zw * WH.xy) / (vec2(PSin.t_float.w) * WH.zw);
-#elif (PS_FST == 0)
+#if (PS_FST == 0)
     vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
     vec2 st_int = PSin.t_int.zw / vec2(PSin.t_float.w);
 #else
diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl
index 1f588a804e..bdb336f0eb 100644
--- a/bin/resources/shaders/vulkan/tfx.glsl
+++ b/bin/resources/shaders/vulkan/tfx.glsl
@@ -312,6 +312,8 @@ void main()
 #define PS_FST 0
 #define PS_WMS 0
 #define PS_WMT 0
+#define PS_ADJS 0
+#define PS_ADJT 0
 #define PS_FMT FMT_32
 #define PS_AEM 0
 #define PS_TFX 0
@@ -332,7 +334,6 @@ void main()
 #define PS_CHANNEL_FETCH 0
 #define PS_TALES_OF_ABYSS_HLE 0
 #define PS_URBAN_CHAOS_HLE 0
-#define PS_INVALID_TEX0 0
 #define PS_SCALE_FACTOR 1.0
 #define PS_HDR 0
 #define PS_COLCLIP 0
@@ -361,10 +362,10 @@ layout(std140, set = 0, binding = 1) uniform cb1
 	vec2 TA;
 	float MaxDepthPS;
 	float Af;
-	uvec4 MskFix;
 	uvec4 FbMask;
 	vec4 HalfTexel;
 	vec4 MinMax;
+	vec4 STRange;
 	ivec4 ChannelShuffle;
 	vec2 TC_OffsetHack;
 	vec2 STScale;
@@ -420,7 +421,20 @@ vec4 sample_c(vec2 uv)
 		// As of 2018 this issue is still present.
 		uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
 #endif
+#if !PS_ADJS && !PS_ADJT
 	uv *= STScale;
+#else
+	#if PS_ADJS
+		uv.x = (uv.x - STRange.x) * STRange.z;
+	#else
+		uv.x = uv.x * STScale.x;
+	#endif
+	#if PS_ADJT
+		uv.y = (uv.y - STRange.y) * STRange.w;
+	#else
+		uv.y = uv.y * STScale.y;
+	#endif
+#endif
 
 #if PS_AUTOMATIC_LOD == 1
     return texture(Texture, uv);
@@ -455,13 +469,7 @@ vec4 sample_p_norm(float u)
 
 vec4 clamp_wrap_uv(vec4 uv)
 {
-	vec4 tex_size;
-
-	#if PS_INVALID_TEX0
-		tex_size = WH.zwzw;
-	#else
-		tex_size = WH.xyxy;
-	#endif
+	vec4 tex_size = WH.xyxy;
 
 	#if PS_WMS == PS_WMT
 	{
@@ -476,7 +484,7 @@ vec4 clamp_wrap_uv(vec4 uv)
 			// textures. Fixes Xenosaga's hair issue.
 			uv = fract(uv);
 			#endif
-			uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
+			uv = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
 		}
 		#endif
 	}
@@ -491,7 +499,7 @@ vec4 clamp_wrap_uv(vec4 uv)
 			#if PS_FST == 0
 			uv.xz = fract(uv.xz);
 			#endif
-			uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
+			uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
 		}
 		#endif
 		#if PS_WMT == 2
@@ -503,7 +511,7 @@ vec4 clamp_wrap_uv(vec4 uv)
 			#if PS_FST == 0
 			uv.yw = fract(uv.yw);
 			#endif
-			uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
+			uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
 		}
 		#endif
 	}
@@ -590,7 +598,7 @@ vec4 fetch_c(ivec2 uv)
 
 ivec2 clamp_wrap_uv_depth(ivec2 uv)
 {
-	ivec4 mask = ivec4(MskFix << 4);
+	ivec4 mask = floatBitsToInt(MinMax) << 4;
 	#if (PS_WMS == PS_WMT)
 	{
 		#if (PS_WMS == 2)
@@ -907,11 +915,7 @@ vec4 fog(vec4 c, float f)
 
 vec4 ps_color()
 {
-#if PS_FST == 0 && PS_INVALID_TEX0 == 1
-	// Re-normalize coordinate from invalid GS to corrected texture size
-	vec2 st = (vsIn.t.xy * WH.xy) / (vsIn.t.w * WH.zw);
-	vec2 st_int = (vsIn.ti.zw * WH.xy) / (vsIn.t.w * WH.zw);
-#elif PS_FST == 0
+#if PS_FST == 0
 	vec2 st = vsIn.t.xy / vsIn.t.w;
 	vec2 st_int = vsIn.ti.zw / vsIn.t.w;
 #else
diff --git a/pcsx2/GS/GSDrawingContext.cpp b/pcsx2/GS/GSDrawingContext.cpp
index 522185836a..011a664781 100644
--- a/pcsx2/GS/GSDrawingContext.cpp
+++ b/pcsx2/GS/GSDrawingContext.cpp
@@ -130,7 +130,7 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
 	res.TW = tw > 10 ? 0 : tw;
 	res.TH = th > 10 ? 0 : th;
 
-	if (GSConfig.Renderer == GSRendererType::SW && (TEX0.TW != res.TW || TEX0.TH != res.TH))
+	if (TEX0.TW != res.TW || TEX0.TH != res.TH)
 	{
 		GL_DBG("FixedTEX0 %05x %d %d tw %d=>%d th %d=>%d st (%.0f,%.0f,%.0f,%.0f) uvmax %d,%d wm %d,%d (%d,%d,%d,%d)",
 			(int)TEX0.TBP0, (int)TEX0.TBW, (int)TEX0.PSM,
@@ -142,50 +142,3 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
 
 	return res;
 }
-
-void GSDrawingContext::ComputeFixedTEX0(const GSVector4& st)
-{
-	// It is quite complex to handle rescaling so this function is less stricter than GetSizeFixedTEX0,
-	// therefore we remove the reduce optimization and we don't handle bilinear filtering which might create wrong interpolation at the border.
-	int tw = TEX0.TW;
-	int th = TEX0.TH;
-
-	int wms = (int)CLAMP.WMS;
-	int wmt = (int)CLAMP.WMT;
-
-	int minu = (int)CLAMP.MINU;
-	int minv = (int)CLAMP.MINV;
-	int maxu = (int)CLAMP.MAXU;
-	int maxv = (int)CLAMP.MAXV;
-
-	if (wms != CLAMP_REGION_CLAMP)
-		tw = tw > 10 ? 0 : tw;
-
-	if (wmt != CLAMP_REGION_CLAMP)
-		th = th > 10 ? 0 : th;
-
-	GSVector4i uv = GSVector4i(st.floor().xyzw(st.ceil()));
-
-	uv.x = findmax(uv.x, uv.z, (1 << tw) - 1, wms, minu, maxu);
-	uv.y = findmax(uv.y, uv.w, (1 << th) - 1, wmt, minv, maxv);
-
-	if (wms == CLAMP_REGION_CLAMP || wms == CLAMP_REGION_REPEAT)
-		tw = extend(uv.x, tw);
-
-	if (wmt == CLAMP_REGION_CLAMP || wmt == CLAMP_REGION_REPEAT)
-		th = extend(uv.y, th);
-
-	tw = std::clamp<int>(tw, 0, 10);
-	th = std::clamp<int>(th, 0, 10);
-
-	if ((tw != (int)TEX0.TW) || (th != (int)TEX0.TH))
-	{
-		m_fixed_tex0 = true;
-		TEX0.TW = tw;
-		TEX0.TH = th;
-
-		GL_DBG("FixedTEX0 TW %d=>%d, TH %d=>%d wm %d,%d",
-			(int)stack.TEX0.TW, (int)TEX0.TW, (int)stack.TEX0.TH, (int)TEX0.TH,
-			(int)CLAMP.WMS, (int)CLAMP.WMT);
-	}
-}
diff --git a/pcsx2/GS/GSDrawingContext.h b/pcsx2/GS/GSDrawingContext.h
index dedba2910c..2122880383 100644
--- a/pcsx2/GS/GSDrawingContext.h
+++ b/pcsx2/GS/GSDrawingContext.h
@@ -69,12 +69,8 @@ public:
 		GIFRegZBUF     ZBUF;
 	} stack;
 
-	bool m_fixed_tex0;
-
 	GSDrawingContext()
 	{
-		m_fixed_tex0 = false;
-
 		memset(&offset, 0, sizeof(offset));
 
 		Reset();
@@ -140,8 +136,6 @@ public:
 	}
 
 	GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const;
-	void ComputeFixedTEX0(const GSVector4& st);
-	bool HasFixedTEX0() const { return m_fixed_tex0; }
 
 	// Save & Restore before/after draw allow to correct/optimize current register for current draw
 	// Note: we could avoid the restore part if all renderer code is updated to use a local copy instead
@@ -159,9 +153,6 @@ public:
 		stack.FBA = FBA;
 		stack.FRAME = FRAME;
 		stack.ZBUF = ZBUF;
-
-		// This function is called before the draw so take opportunity to reset m_fixed_tex0
-		m_fixed_tex0 = false;
 	}
 
 	void RestoreReg()
diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h
index 1ab1aa12cd..4076141964 100644
--- a/pcsx2/GS/GSRegs.h
+++ b/pcsx2/GS/GSRegs.h
@@ -823,6 +823,7 @@ union
 REG_END2
 	__forceinline bool IsRepeating() const
 	{
+		// This is actually "does the texture span more than one page".
 		if (TBW < 2)
 		{
 			if (PSM == PSM_PSMT8)
diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp
index 5077d94f3a..0fda71972e 100644
--- a/pcsx2/GS/GSState.cpp
+++ b/pcsx2/GS/GSState.cpp
@@ -1699,7 +1699,6 @@ inline void GSState::CopyEnv(GSDrawingEnvironment* dest, GSDrawingEnvironment* s
 {
 	memcpy(dest, src, 88);
 	memcpy(&dest->CTXT[ctx], &src->CTXT[ctx], 96);
-	dest->CTXT[ctx].m_fixed_tex0 = src->CTXT[ctx].m_fixed_tex0;
 }
 
 void GSState::Flush(GSFlushReason reason)
@@ -3583,8 +3582,11 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, c
 
 	const int minu = (int)CLAMP.MINU;
 	const int minv = (int)CLAMP.MINV;
-	const int maxu = (int)CLAMP.MAXU;
-	const int maxv = (int)CLAMP.MAXV;
+
+	// For the FixedTEX0 case, in hardware, we handle this in the texture cache. Don't OR the bits in here, otherwise
+	// we'll end up with an invalid rectangle, we want the passed-in rectangle to be relative to the normalized size.
+	const int maxu = (wms != CLAMP_REGION_REPEAT || (int)CLAMP.MAXU < w) ? (int)CLAMP.MAXU : 0;
+	const int maxv = (wmt != CLAMP_REGION_REPEAT || (int)CLAMP.MAXV < h) ? (int)CLAMP.MAXV : 0;
 
 	GSVector4i vr = tr;
 
diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h
index 74357290c4..5b536207e1 100644
--- a/pcsx2/GS/Renderers/Common/GSDevice.h
+++ b/pcsx2/GS/Renderers/Common/GSDevice.h
@@ -309,6 +309,8 @@ struct alignas(16) GSHWDrawConfig
 				u32 tcc : 1;
 				u32 wms : 2;
 				u32 wmt : 2;
+				u32 adjs : 1;
+				u32 adjt : 1;
 				u32 ltf : 1;
 				// Shuffle and fbmask effect
 				u32 shuffle  : 1;
@@ -352,7 +354,6 @@ struct alignas(16) GSHWDrawConfig
 				u32 automatic_lod : 1;
 				u32 manual_lod : 1;
 				u32 point_sampler : 1;
-				u32 invalid_tex0 : 1; // Lupin the 3rd
 
 				// Scan mask
 				u32 scanmsk : 2;
@@ -554,11 +555,11 @@ struct alignas(16) GSHWDrawConfig
 		GSVector4 FogColor_AREF;
 		GSVector4 WH;
 		GSVector4 TA_MaxDepth_Af;
-		GSVector4i MskFix;
 		GSVector4i FbMask;
 
 		GSVector4 HalfTexel;
 		GSVector4 MinMax;
+		GSVector4 STRange;
 		GSVector4i ChannelShuffle;
 		GSVector2 TCOffsetHack;
 		GSVector2 STScale;
diff --git a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp
index 60c3621568..fafe075d88 100644
--- a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp
+++ b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp
@@ -142,6 +142,8 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
 		sm.AddMacro("PS_FST", sel.fst);
 		sm.AddMacro("PS_WMS", sel.wms);
 		sm.AddMacro("PS_WMT", sel.wmt);
+		sm.AddMacro("PS_ADJS", sel.adjs);
+		sm.AddMacro("PS_ADJT", sel.adjt);
 		sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
 		sm.AddMacro("PS_AEM", sel.aem);
 		sm.AddMacro("PS_TFX", sel.tfx);
@@ -164,7 +166,6 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
 		sm.AddMacro("PS_DFMT", sel.dfmt);
 		sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
 		sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
-		sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0);
 		sm.AddMacro("PS_HDR", sel.hdr);
 		sm.AddMacro("PS_COLCLIP", sel.colclip);
 		sm.AddMacro("PS_BLEND_A", sel.blend_a);
diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp
index 0b24c4c735..e67fd9bde5 100644
--- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp
+++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp
@@ -1483,6 +1483,8 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
 	sm.AddMacro("PS_FST", sel.fst);
 	sm.AddMacro("PS_WMS", sel.wms);
 	sm.AddMacro("PS_WMT", sel.wmt);
+	sm.AddMacro("PS_ADJS", sel.adjs);
+	sm.AddMacro("PS_ADJT", sel.adjt);
 	sm.AddMacro("PS_AEM_FMT", sel.aem_fmt);
 	sm.AddMacro("PS_AEM", sel.aem);
 	sm.AddMacro("PS_TFX", sel.tfx);
@@ -1505,7 +1507,6 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
 	sm.AddMacro("PS_DFMT", sel.dfmt);
 	sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
 	sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
-	sm.AddMacro("PS_INVALID_TEX0", sel.invalid_tex0);
 	sm.AddMacro("PS_HDR", sel.hdr);
 	sm.AddMacro("PS_COLCLIP", sel.colclip);
 	sm.AddMacro("PS_BLEND_A", sel.blend_a);
diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp
index e5ae60f811..bb94c21ab9 100644
--- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp
+++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp
@@ -1279,10 +1279,6 @@ void GSRendererHW::Draw()
 		return;
 	}
 
-	// Fix TEX0 size
-	if (PRIM->TME && !IsMipMapActive())
-		m_context->ComputeFixedTEX0(m_vt.m_min.t.xyxy(m_vt.m_max.t));
-
 	// skip alpha test if possible
 	// Note: do it first so we know if frame/depth writes are masked
 
@@ -1528,8 +1524,8 @@ void GSRendererHW::Draw()
 
 		TextureMinMaxResult tmm = GetTextureMinMax(TEX0, MIP_CLAMP, m_vt.IsLinear());
 
-		m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, tmm.coverage) :
-			m_tc->LookupSource(TEX0, env.TEXA, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic ||
+		m_src = tex_psm.depth ? m_tc->LookupDepthSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage) :
+			m_tc->LookupSource(TEX0, env.TEXA, MIP_CLAMP, tmm.coverage, (GSConfig.HWMipmap >= HWMipmapLevel::Basic ||
 				GSConfig.TriFilter == TriFiltering::Forced) ? &hash_lod_range : nullptr);
 
 		// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
@@ -1642,7 +1638,7 @@ void GSRendererHW::Draw()
 
 			for (int layer = m_lod.x + 1; layer <= m_lod.y; layer++)
 			{
-				const GIFRegTEX0& MIP_TEX0 = GetTex0Layer(layer);
+				const GIFRegTEX0 MIP_TEX0(GetTex0Layer(layer));
 
 				m_context->offset.tex = m_mem.GetOffset(MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM);
 
@@ -3105,6 +3101,26 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
 	}
 }
 
+__ri static constexpr bool IsRedundantClamp(u8 clamp, u32 clamp_min, u32 clamp_max, u32 tsize)
+{
+	// Don't shader sample when the clamp/repeat is configured to the texture size.
+	// That way trilinear etc still works.
+	const u32 textent = (1u << tsize) - 1u;
+	if (clamp == CLAMP_REGION_CLAMP)
+		return (clamp_min == 0 && clamp_max == textent);
+	else if (clamp == CLAMP_REGION_REPEAT)
+		return (clamp_max == 0 && clamp_min == textent);
+	else
+		return false;
+}
+
+__ri static constexpr u8 EffectiveClamp(u8 clamp, bool has_region)
+{
+	// When we have extracted the region in the texture, we can use the hardware sampler for repeat/clamp.
+	// (weird flip here because clamp/repeat is inverted for region vs non-region).
+	return (clamp >= CLAMP_REGION_CLAMP && has_region) ? (clamp ^ 3) : clamp;
+}
+
 void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
 {
 	// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
@@ -3112,9 +3128,16 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
 	const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
 
-	const u8 wms = m_context->CLAMP.WMS;
-	const u8 wmt = m_context->CLAMP.WMT;
+	static constexpr const char* clamp_modes[] = { "REPEAT", "CLAMP", "REGION_CLAMP", "REGION_REPEAT" };
+	const bool redundant_wms = IsRedundantClamp(m_context->CLAMP.WMS, m_context->CLAMP.MINU, m_context->CLAMP.MAXU, tex->m_TEX0.TW);
+	const bool redundant_wmt = IsRedundantClamp(m_context->CLAMP.WMT, m_context->CLAMP.MINV, m_context->CLAMP.MAXV, tex->m_TEX0.TH);
+	const u8 wms = EffectiveClamp(m_context->CLAMP.WMS, tex->m_region.HasX());
+	const u8 wmt = EffectiveClamp(m_context->CLAMP.WMT, tex->m_region.HasY());
 	const bool complex_wms_wmt = !!((wms | wmt) & 2);
+	GL_CACHE("WMS: %s [%s%s] WMT: %s [%s%s] Complex: %d MINU: %d MINV: %d MINV: %d MAXV: %d",
+		clamp_modes[m_context->CLAMP.WMS], redundant_wms ? "redundant," : "", clamp_modes[wms],
+		clamp_modes[m_context->CLAMP.WMT], redundant_wmt ? "redundant," : "", clamp_modes[wmt],
+		complex_wms_wmt, m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
 
 	const bool need_mipmap = IsMipMapDraw();
 	const bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt || psm.depth;
@@ -3290,14 +3313,38 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
 	const GSVector4 st_scale = WH.zwzw() / GSVector4(w, h).xyxy();
 	m_conf.cb_ps.STScale = GSVector2(st_scale.x, st_scale.y);
 
+	if (tex->m_region.HasX())
+	{
+		m_conf.cb_ps.STRange.x = static_cast<float>(tex->m_region.GetMinX()) / static_cast<float>(miptw);
+		m_conf.cb_ps.STRange.z = static_cast<float>(miptw) / static_cast<float>(tex->m_region.GetWidth());
+		m_conf.ps.adjs = 1;
+	}
+	if (tex->m_region.HasY())
+	{
+		m_conf.cb_ps.STRange.y = static_cast<float>(tex->m_region.GetMinY()) / static_cast<float>(mipth);
+		m_conf.cb_ps.STRange.w = static_cast<float>(mipth) / static_cast<float>(tex->m_region.GetHeight());
+		m_conf.ps.adjt = 1;
+	}
+
 	m_conf.ps.fst = !!PRIM->FST;
 
 	m_conf.cb_ps.WH = WH;
 	m_conf.cb_ps.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
 	if (complex_wms_wmt)
 	{
-		m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);;
-		m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy();
+		const GSVector4i clamp(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
+		const GSVector4 region_repeat(GSVector4::cast(clamp));
+		const GSVector4 region_clamp(GSVector4(clamp) / WH.xyxy());
+		if (wms >= CLAMP_REGION_CLAMP)
+		{
+			m_conf.cb_ps.MinMax.x = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.x : region_repeat.x;
+			m_conf.cb_ps.MinMax.z = (wms == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.z : region_repeat.z;
+		}
+		if (wmt >= CLAMP_REGION_CLAMP)
+		{
+			m_conf.cb_ps.MinMax.y = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.y : region_repeat.y;
+			m_conf.cb_ps.MinMax.w = (wmt == CLAMP_REGION_CLAMP && !m_conf.ps.depth_fmt) ? region_clamp.w : region_repeat.w;
+		}
 	}
 	else if (trilinear_manual)
 	{
@@ -3318,18 +3365,6 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex)
 	m_conf.cb_ps.TCOffsetHack = GSVector2(tc_oh_ts.z, tc_oh_ts.w);
 	m_conf.cb_vs.texture_scale = GSVector2(tc_oh_ts.x, tc_oh_ts.y);
 
-	// Must be done after all coordinates math
-	if (m_context->HasFixedTEX0() && !PRIM->FST)
-	{
-		m_conf.ps.invalid_tex0 = 1;
-		// Use invalid size to denormalize ST coordinate
-		m_conf.cb_ps.WH.x = static_cast<float>(1 << m_context->stack.TEX0.TW);
-		m_conf.cb_ps.WH.y = static_cast<float>(1 << m_context->stack.TEX0.TH);
-
-		// We can't handle m_target with invalid_tex0 atm due to upscaling
-		ASSERT(!tex->m_target);
-	}
-
 	// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
 	m_conf.sampler.tau = (wms != CLAMP_CLAMP);
 	m_conf.sampler.tav = (wmt != CLAMP_CLAMP);
diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp
index a6c4c33040..56d2b564f0 100644
--- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp
+++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp
@@ -119,7 +119,7 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm
 		target->m_dirty.push_back(GSDirtyRect(rect, psm, bw));
 }
 
-GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette)
+GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette)
 {
 	if (GSConfig.UserHacks_DisableDepthSupport)
 	{
@@ -177,7 +177,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
 			TEX0.TBP0, psm_str(psm));
 
 		// Create a shared texture source
-		src = new Source(TEX0, TEXA, true);
+		src = new Source(TEX0, TEXA);
 		src->m_texture = dst->m_texture;
 		src->m_shared_texture = true;
 		src->m_target = true; // So renderer can check if a conversion is required
@@ -201,7 +201,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
 	else if (g_gs_renderer->m_game.title == CRC::SVCChaos || g_gs_renderer->m_game.title == CRC::KOF2002)
 	{
 		// SVCChaos black screen & KOF2002 blue screen on main menu, regardless of depth enabled or disabled.
-		return LookupSource(TEX0, TEXA, r, nullptr);
+		return LookupSource(TEX0, TEXA, CLAMP, r, nullptr);
 	}
 	else
 	{
@@ -227,24 +227,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
 	return src;
 }
 
-GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod)
+__ri static GSTextureCache::Source* FindSourceInMap(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
+	const GSLocalMemory::psm_t& psm_s, const u32* clut, const GSTexture* gpu_clut, const GSVector2i& compare_lod,
+	const GSTextureCache::SourceRegion& region, FastList<GSTextureCache::Source*>& map)
 {
-	GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP);
-
-	const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
-	//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
-
-	const u32* const clut = g_gs_renderer->m_mem.m_clut;
-	GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
-
-	Source* src = NULL;
-
-	auto& m = m_src.m_map[TEX0.TBP0 >> 5];
-
-	const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0));
-	for (auto i = m.begin(); i != m.end(); ++i)
+	for (auto i = map.begin(); i != map.end(); ++i)
 	{
-		Source* s = *i;
+		GSTextureCache::Source* s = *i;
 
 		if (((TEX0.U32[0] ^ s->m_TEX0.U32[0]) | ((TEX0.U32[1] ^ s->m_TEX0.U32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
 			continue;
@@ -272,20 +261,92 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
 					continue;
 			}
 
+			if (s->m_region.bits != 0 && s->m_region.bits != region.bits)
+				continue;
+
 			// Same base mip texture, but we need to check that MXL was the same as well.
 			// When mipmapping is off, this will be 0,0 vs 0,0.
 			if (s->m_lod != compare_lod)
 				continue;
 		}
 
-		m.MoveFront(i.Index());
-
-		src = s;
-
-		break;
+		map.MoveFront(i.Index());
+		return s;
 	}
 
-	Target* dst = NULL;
+	return nullptr;
+}
+
+GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod)
+{
+	GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x, TW: %d, TH: %d)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP, 1 << TEX0.TW, 1 << TEX0.TH);
+
+	const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
+	//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
+
+	const u32* const clut = g_gs_renderer->m_mem.m_clut;
+	GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
+
+	SourceRegion region = {};
+	if (CLAMP.WMS == CLAMP_REGION_CLAMP && CLAMP.MAXU >= CLAMP.MINU)
+	{
+		// Another Lupin case here, it uses region clamp with UV (not ST), puts a clamp region further
+		// into the texture, but a smaller TW/TH. Catch this by looking for a clamp range above TW.
+		const u32 rw = CLAMP.MAXU - CLAMP.MAXU + 1;
+		if (rw < (1u << TEX0.TW) || CLAMP.MAXU >= (1u << TEX0.TW))
+		{
+			region.SetX(CLAMP.MINU, CLAMP.MAXU + 1);
+			GL_CACHE("TC: Region clamp optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth());
+		}
+	}
+	else if (CLAMP.WMS == CLAMP_REGION_REPEAT && CLAMP.MINU != 0)
+	{
+		// Lupin the 3rd is really evil, it sets TW/TH to the texture size, but then uses region repeat
+		// to offset the actual texture data to elsewhere. So, we'll just force any cases like this down
+		// the region texture path.
+		const u32 rw = ((CLAMP.MINU | CLAMP.MAXU) - CLAMP.MAXU) + 1;
+		if (rw < (1u << TEX0.TW) || CLAMP.MAXU != 0)
+		{
+			region.SetX(CLAMP.MAXU, (CLAMP.MINU | CLAMP.MAXU) + 1);
+			GL_CACHE("TC: Region repeat optimization: %d width -> %d", 1 << TEX0.TW, region.GetWidth());
+		}
+	}
+	if (CLAMP.WMT == CLAMP_REGION_CLAMP && CLAMP.MAXV >= CLAMP.MINV)
+	{
+		const u32 rh = CLAMP.MAXV - CLAMP.MINV + 1;
+		if (rh < (1u << TEX0.TH) || CLAMP.MAXV >= (1u << TEX0.TH))
+		{
+			region.SetY(CLAMP.MINV, CLAMP.MAXV + 1);
+			GL_CACHE("TC: Region clamp optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight());
+		}
+	}
+	else if (CLAMP.WMT == CLAMP_REGION_REPEAT && CLAMP.MINV != 0)
+	{
+		const u32 rh = ((CLAMP.MINV | CLAMP.MAXV) - CLAMP.MAXV) + 1;
+		if (rh < (1u << TEX0.TH) || CLAMP.MAXV != 0)
+		{
+			region.SetY(CLAMP.MAXV, (CLAMP.MINV | CLAMP.MAXV) + 1);
+			GL_CACHE("TC: Region repeat optimization: %d height -> %d", 1 << TEX0.TW, region.GetHeight());
+		}
+	}
+
+	const GSVector2i compare_lod(lod ? *lod : GSVector2i(0, 0));
+	Source* src = nullptr;
+
+	// Region textures might be placed in a different page, so check that first.
+	const u32 lookup_page = TEX0.TBP0 >> 5;
+	if (region.GetMinX() != 0 || region.GetMinY() != 0)
+	{
+		const GSOffset offset(psm_s.info, TEX0.TBP0, TEX0.TBW, TEX0.PSM);
+		const u32 region_page = offset.bn(region.GetMinX(), region.GetMinY()) >> 5;
+		if (lookup_page != region_page)
+			src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[region_page]);
+	}
+	if (!src)
+		src = FindSourceInMap(TEX0, TEXA, psm_s, clut, gpu_clut, compare_lod, region, m_src.m_map[lookup_page]);
+
+
+	Target* dst = nullptr;
 	bool half_right = false;
 	int x_offset = 0;
 	int y_offset = 0;
@@ -293,7 +354,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
 #ifdef DISABLE_HW_TEXTURE_CACHE
 	if (0)
 #else
-	if (src == NULL)
+	if (!src)
 #endif
 	{
 		const u32 bp = TEX0.TBP0;
@@ -466,11 +527,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
 						GIFRegTEX0 depth_TEX0;
 						depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
 						depth_TEX0.U32[1] = TEX0.U32[1];
-						return LookupDepthSource(depth_TEX0, TEXA, r);
+						return LookupDepthSource(depth_TEX0, TEXA, CLAMP, r);
 					}
 					else
 					{
-						return LookupDepthSource(TEX0, TEXA, r, true);
+						return LookupDepthSource(TEX0, TEXA, CLAMP, r, true);
 					}
 				}
 			}
@@ -496,7 +557,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
 			GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
 		}
 #endif
-		src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut);
+		src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region);
 	}
 	else
 	{
@@ -1893,13 +1954,13 @@ void GSTextureCache::IncAge()
 }
 
 //Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
-GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut)
+GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region)
 {
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
-	Source* src = new Source(TEX0, TEXA, false);
+	Source* src = new Source(TEX0, TEXA);
 
-	const int tw = 1 << TEX0.TW;
-	const int th = 1 << TEX0.TH;
+	int tw = 1 << TEX0.TW;
+	int th = 1 << TEX0.TH;
 	//int tp = TEX0.TBW << 6;
 	int tlevels = 1;
 	if (lod)
@@ -2211,8 +2272,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 		bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0) || gpu_clut;
 		const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
 
+		// adjust texture size to fit
+		src->m_region = region;
+		tw = region.HasX() ? region.GetWidth() : tw;
+		th = region.HasY() ? region.GetHeight() : th;
+
 		// try the hash cache
-		if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr)
+		if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr)
 		{
 			src->m_texture = src->m_from_hash_cache->texture;
 			if (gpu_clut)
@@ -2245,6 +2311,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 	ASSERT(src->m_from_target == (dst ? &dst->m_texture : nullptr));
 	ASSERT(src->m_texture->GetScale() == ((!dst || TEX0.PSM == PSM_PSMT8) ? GSVector2(1, 1) : dst->m_texture->GetScale()));
 
+	src->SetPages();
+
 	m_src.Add(src, TEX0, g_gs_renderer->m_context->offset.tex);
 
 	return src;
@@ -2253,7 +2321,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 // This really needs a better home...
 extern bool FMVstarted;
 
-GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod)
+GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region)
 {
 	// don't bother hashing if we're not dumping or replacing.
 	const bool dump = GSConfig.DumpReplaceableTextures && (!FMVstarted || GSConfig.DumpTexturesWithFMVActive) &&
@@ -2265,13 +2333,13 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
 
 	// need the hash either for replacing, dumping or caching.
 	// if dumping/replacing is on, we compute the clut hash regardless, since replacements aren't indexed
-	HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod)};
+	HashCacheKey key{HashCacheKey::Create(TEX0, TEXA, (dump || replace || !paltex) ? clut : nullptr, lod, region)};
 
 	// handle dumping first, this is mostly isolated.
 	if (dump)
 	{
 		// dump base level
-		GSTextureReplacements::DumpTexture(key, TEX0, TEXA, g_gs_renderer->m_mem, 0);
+		GSTextureReplacements::DumpTexture(key, TEX0, TEXA, region, g_gs_renderer->m_mem, 0);
 
 		// and the mips
 		if (lod && GSConfig.DumpReplaceableMipmaps)
@@ -2281,7 +2349,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
 			for (int mip = 1; mip < nmips; mip++)
 			{
 				const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)};
-				GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, g_gs_renderer->m_mem, mip);
+				GSTextureReplacements::DumpTexture(key, MIP_TEX0, TEXA, region, g_gs_renderer->m_mem, mip);
 			}
 		}
 	}
@@ -2355,8 +2423,8 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
 		return nullptr;
 
 	// expand/upload texture
-	const int tw = 1 << TEX0.TW;
-	const int th = 1 << TEX0.TH;
+	const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
+	const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
 	const int tlevels = lod ? ((GSConfig.HWMipmap != HWMipmapLevel::Full) ? -1 : (lod->y - lod->x + 1)) : 1;
 	GSTexture* tex = g_gs_device->CreateTexture(tw, th, tlevels, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color);
 	if (!tex)
@@ -2366,7 +2434,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
 	}
 
 	// upload base level
-	PreloadTexture(TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, 0);
+	PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0);
 
 	// upload mips if present
 	if (lod)
@@ -2376,7 +2444,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
 		for (int mip = 1; mip < nmips; mip++)
 		{
 			const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)};
-			PreloadTexture(MIP_TEX0, TEXA, g_gs_renderer->m_mem, paltex, tex, mip);
+			PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip);
 		}
 	}
 
@@ -2649,12 +2717,13 @@ bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i
 
 // GSTextureCache::Source
 
-GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container)
+GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
 	: m_palette_obj(nullptr)
 	, m_palette(nullptr)
 	, m_valid_rect(0, 0)
 	, m_lod(0, 0)
 	, m_target(false)
+	, m_repeating(false)
 	, m_p2t(NULL)
 	, m_from_target(NULL)
 	, m_from_target_TEX0(TEX0)
@@ -2662,32 +2731,8 @@ GSTextureCache::Source::Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, b
 	m_TEX0 = TEX0;
 	m_TEXA = TEXA;
 
-	if (dummy_container)
-	{
-		// Dummy container only contain a m_texture that is a pointer to another source.
-
-		m_write.rect = NULL;
-		m_write.count = 0;
-
-		m_repeating = false;
-	}
-	else
-	{
-		memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
-		memset(m_layer_hash, 0, sizeof(m_layer_hash));
-
-		m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
-		m_write.count = 0;
-
-		m_repeating = m_TEX0.IsRepeating();
-
-		if (m_repeating && !CanPreload())
-		{
-			m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0);
-		}
-
-		m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
-	}
+	memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
+	memset(m_layer_hash, 0, sizeof(m_layer_hash));
 }
 
 GSTextureCache::Source::~Source()
@@ -2703,6 +2748,23 @@ GSTextureCache::Source::~Source()
 	}
 }
 
+void GSTextureCache::Source::SetPages()
+{
+	const int tw = 1 << m_TEX0.TW;
+	const int th = 1 << m_TEX0.TH;
+
+	m_repeating = !m_from_hash_cache && m_TEX0.IsRepeating() && !m_region.IsFixedTEX0(tw, th);
+
+	if (m_repeating && !CanPreload())
+	{
+		// TODO: wrong for lupin/invalid tex0
+		m_p2t = g_gs_renderer->m_mem.GetPage2TileMap(m_TEX0);
+	}
+
+	const GSVector4i rect(m_region.GetRect(tw, th));
+	m_pages = g_gs_renderer->m_context->offset.tex.pageLooperForRect(rect);
+}
+
 void GSTextureCache::Source::Update(const GSVector4i& rect, int level)
 {
 	Surface::UpdateAge();
@@ -2719,9 +2781,17 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int level)
 	const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
 	const int tw = 1 << m_TEX0.TW;
 	const int th = 1 << m_TEX0.TH;
-	const GSVector4i r = rect.ralign<Align_Outside>(bs);
 
-	if (r.eq(GSVector4i(0, 0, tw, th)))
+	GSVector4i r(rect);
+	const GSVector4i region_rect(m_region.GetRect(tw, th));
+
+	// Offset the pages we use by the clamp region.
+	if (m_region.HasEither())
+		r = (r + m_region.GetOffset(tw, th)).rintersect(region_rect);
+
+	r = r.ralign<Align_Outside>(bs);
+
+	if (region_rect.eq(m_region.HasEither() ? r.rintersect(region_rect) : r))
 		m_complete_layers |= (1u << level);
 
 	const GSOffset& off = g_gs_renderer->m_context->offset.tex;
@@ -2818,6 +2888,9 @@ void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4
 
 void GSTextureCache::Source::Write(const GSVector4i& r, int layer)
 {
+	if (!m_write.rect)
+		m_write.rect = static_cast<GSVector4i*>(_aligned_malloc(3 * sizeof(GSVector4i), 32));
+
 	m_write.rect[m_write.count++] = r;
 
 	while (m_write.count >= 2)
@@ -2857,11 +2930,12 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
 	// However the function is never called for these cases.  This is just for information
 	// should someone wish to use this function for these cases later.
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
+	const SourceRegion region((layer == 0) ? m_region : m_region.AdjustForMipmap(layer));
 
-	const int tw = 1 << m_TEX0.TW;
-	const int th = 1 << m_TEX0.TH;
-
-	const GSVector4i tr(0, 0, tw, th);
+	// For the invalid tex0 case, the region might be larger than TEX0.TW/TH.
+	const int tw = std::max(region.GetWidth(), 1u << m_TEX0.TW);
+	const int th = std::max(region.GetHeight(), 1u << m_TEX0.TH);
+	const GSVector4i tex_r(region.GetRect(tw, th));
 
 	int pitch = std::max(tw, psm.bs.x) * sizeof(u32);
 
@@ -2877,35 +2951,33 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
 		rtx = psm.rtxP;
 	}
 
-	u8* buff = s_unswizzle_buffer;
-
 	for (u32 i = 0; i < count; i++)
 	{
-		const GSVector4i r = m_write.rect[i];
+		const GSVector4i r(m_write.rect[i]);
 
-		if ((r > tr).mask() & 0xff00)
-		{
-			rtx(mem, off, r, buff, pitch, m_TEXA);
-
-			m_texture->Update(r.rintersect(tr), buff, pitch, layer);
-		}
-		else
+		// if update rect lies to the left/above of the region rectangle, or extends past the texture bounds, we can't use a direct map
+		if (((r > tex_r).mask() & 0xff00) == 0 && ((tex_r > r).mask() & 0x00ff) == 0)
 		{
 			GSTexture::GSMap m;
-
-			if (m_texture->Map(m, &r, layer))
+			const GSVector4i map_r(r - tex_r.xyxy());
+			if (m_texture->Map(m, &map_r, layer))
 			{
 				rtx(mem, off, r, m.bits, m.pitch, m_TEXA);
-
 				m_texture->Unmap();
-			}
-			else
-			{
-				rtx(mem, off, r, buff, pitch, m_TEXA);
-
-				m_texture->Update(r, buff, pitch, layer);
+				continue;
 			}
 		}
+
+		const GSVector4i rint(r.rintersect(tex_r));
+		if (rint.width() == 0 || rint.height() == 0)
+			continue;
+
+		rtx(mem, off, r, s_unswizzle_buffer, pitch, m_TEXA);
+
+		// need to offset if we're a region texture
+		const u8* src = s_unswizzle_buffer + (pitch * static_cast<u32>(std::max(tex_r.top - r.top, 0))) +
+						(static_cast<u32>(std::max(tex_r.left - r.left, 0)) << (m_palette ? 0 : 2));
+		m_texture->Update(rint - tex_r.xyxy(), src, pitch, layer);
 	}
 
 	if (count < m_write.count)
@@ -2920,7 +2992,7 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
 void GSTextureCache::Source::PreloadLevel(int level)
 {
 	// m_TEX0 is adjusted for mips (messy, should be changed).
-	const HashType hash = HashTexture(m_TEX0, m_TEXA);
+	const HashType hash = HashTexture(m_TEX0, m_TEXA, m_region);
 
 	// Layer is complete again, regardless of whether the hash matches or not (and we reupload).
 	const u8 layer_bit = static_cast<u8>(1) << level;
@@ -2934,7 +3006,7 @@ void GSTextureCache::Source::PreloadLevel(int level)
 	m_layer_hash[level] = hash;
 
 	// And upload the texture.
-	PreloadTexture(m_TEX0, m_TEXA, g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level);
+	PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level);
 }
 
 bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key)
@@ -3674,6 +3746,47 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur
 	return true;
 }
 
+bool GSTextureCache::SourceRegion::IsFixedTEX0(int tw, int th) const
+{
+	return (GetMinX() >= static_cast<u32>(tw) || GetMinY() >= static_cast<u32>(th));
+}
+
+GSVector4i GSTextureCache::SourceRegion::GetRect(int tw, int th) const
+{
+	return GSVector4i(HasX() ? GetMinX() : 0, HasY() ? GetMinY() : 0, HasX() ? GetMaxX() : tw, HasY() ? GetMaxY() : th);
+}
+
+GSVector4i GSTextureCache::SourceRegion::GetOffset(int tw, int th) const
+{
+	const int xoffs = (GetMaxX() > static_cast<u32>(tw)) ? static_cast<int>(GetMinX()) : 0;
+	const int yoffs = (GetMaxY() > static_cast<u32>(th)) ? static_cast<int>(GetMinY()) : 0;
+	return GSVector4i(xoffs, yoffs, xoffs, yoffs);
+}
+
+GSTextureCache::SourceRegion GSTextureCache::SourceRegion::AdjustForMipmap(u32 level) const
+{
+	SourceRegion ret = {};
+	if (HasX())
+	{
+		const u32 new_minx = GetMinX() >> level;
+		const u32 new_maxx = ((GetMaxX() - 1) >> level) + 1;
+		ret.SetX(new_minx, new_maxx);
+	}
+	if (HasY())
+	{
+		const u32 new_miny = GetMinY() >> level;
+		const u32 new_maxy = ((GetMaxY() - 1) >> level) + 1;
+		ret.SetY(new_miny, new_maxy);
+	}
+	return ret;
+}
+
+void GSTextureCache::SourceRegion::AdjustTEX0(GIFRegTEX0* TEX0) const
+{
+	const GSOffset offset(GSLocalMemory::m_psm[TEX0->PSM].info, TEX0->TBP0, TEX0->TBW, TEX0->PSM);
+	TEX0->TBP0 += offset.bn(GetMinX(), GetMinY());
+}
+
 using BlockHashState = XXH3_state_t;
 
 __fi static void BlockHashReset(BlockHashState& st)
@@ -3696,16 +3809,16 @@ __fi static GSTextureCache::HashType FinishBlockHash(BlockHashState& st)
 	return GSXXH3_64bits_digest(&st);
 }
 
-static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, BlockHashState& hash_st, u8* temp)
+static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, BlockHashState& hash_st, u8* temp)
 {
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
 	const GSVector2i& bs = psm.bs;
-	const int tw = 1 << TEX0.TW;
-	const int th = 1 << TEX0.TH;
+	const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
+	const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
 
 	// From GSLocalMemory foreachBlock(), used for reading textures.
 	// We want to hash the exact same blocks here.
-	const GSVector4i rect(0, 0, tw, th);
+	const GSVector4i rect(region.GetRect(tw, th));
 	const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
 	GSLocalMemory& mem = g_gs_renderer->m_mem;
 	const GSOffset off = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
@@ -3717,7 +3830,7 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
 	// the texture data with other textures/framebuffers/etc (which is common).
 	// Even though you might think this would be slower than just hashing for the hash
 	// cache, it actually ends up faster (unswizzling is faster than hashing).
-	if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu)
+	if (tw < bs.x || th < bs.y || psm.fmsk != 0xFFFFFFFFu || region.GetMaxX() > 0 || region.GetMinY() > 0)
 	{
 		// Expand texture indices. Align to 32 bytes for AVX2.
 		const u32 pitch = Common::AlignUpPow2(static_cast<u32>(block_rect.z), 32);
@@ -3728,7 +3841,8 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
 		rtx(mem, off, block_rect, temp, pitch, TEXA);
 
 		// Hash the expanded texture.
-		u8* ptr = temp;
+		u8* ptr = temp + (pitch * static_cast<u32>(rect.top - block_rect.top)) +
+				  static_cast<u32>(rect.left - block_rect.left);
 		if (pitch == row_size)
 		{
 			BlockHashAccumulate(hash_st, ptr, pitch * static_cast<u32>(th));
@@ -3741,8 +3855,6 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
 	}
 	else
 	{
-		BlockHashReset(hash_st);
-
 		GSOffset::BNHelper bn = off.bnMulti(block_rect.left, block_rect.top);
 		const int right = block_rect.right >> off.blockShiftX();
 		const int bottom = block_rect.bottom >> off.blockShiftY();
@@ -3758,27 +3870,27 @@ static void HashTextureLevel(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Blo
 	}
 }
 
-GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
+GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region)
 {
 	BlockHashState hash_st;
 	BlockHashReset(hash_st);
-	HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer);
+	HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer);
 	return FinishBlockHash(hash_st);
 }
 
-void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
+void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
 {
 	// m_TEX0 is adjusted for mips (messy, should be changed).
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
 	const GSVector2i& bs = psm.bs;
-	const int tw = 1 << TEX0.TW;
-	const int th = 1 << TEX0.TH;
+	const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
+	const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
 
 	// Expand texture/apply palette.
-	const GSVector4i rect(0, 0, tw, th);
+	const GSVector4i rect(region.GetRect(tw, th));
 	const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
 	const GSOffset off(mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM));
-	const int read_width = std::max(tw, psm.bs.x);
+	const int read_width = block_rect.width();
 	u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
 	GSLocalMemory::readTexture rtx = psm.rtx;
 	if (paltex)
@@ -3788,8 +3900,9 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
 	}
 
 	// If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer.
+	const GSVector4i unoffset_rect(0, 0, tw, th);
 	GSTexture::GSMap map;
-	if (rect.eq(block_rect) && tex->Map(map, &rect, level))
+	if (rect.eq(block_rect) && tex->Map(map, &unoffset_rect, level))
 	{
 		rtx(mem, off, block_rect, map.bits, map.pitch, TEXA);
 		tex->Unmap();
@@ -3801,7 +3914,10 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
 
 		u8* buff = s_unswizzle_buffer;
 		rtx(mem, off, block_rect, buff, pitch, TEXA);
-		tex->Update(rect, buff, pitch, level);
+
+		const u8* ptr = buff + (pitch * static_cast<u32>(rect.top - block_rect.top)) +
+						(static_cast<u32>(rect.left - block_rect.left) << (paltex ? 0 : 2));
+		tex->Update(unoffset_rect, ptr, pitch, level);
 	}
 }
 
@@ -3813,7 +3929,7 @@ GSTextureCache::HashCacheKey::HashCacheKey()
 	TEXA.U64 = 0;
 }
 
-GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod)
+GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region)
 {
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
 
@@ -3821,12 +3937,13 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE
 	ret.TEX0.U64 = TEX0.U64 & 0x00000007FFF00000ULL;
 	ret.TEXA.U64 = (psm.pal == 0 && psm.fmt > 0) ? (TEXA.U64 & 0x000000FF000080FFULL) : 0;
 	ret.CLUTHash = clut ? GSTextureCache::PaletteKeyHash{}({clut, psm.pal}) : 0;
+	ret.region = region;
 
 	BlockHashState hash_st;
 	BlockHashReset(hash_st);
 
 	// base level is always hashed
-	HashTextureLevel(TEX0, TEXA, hash_st, s_unswizzle_buffer);
+	HashTextureLevel(TEX0, TEXA, region, hash_st, s_unswizzle_buffer);
 
 	if (lod)
 	{
@@ -3836,7 +3953,7 @@ GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTE
 		for (int i = 1; i < nmips; i++)
 		{
 			const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + i)};
-			HashTextureLevel(MIP_TEX0, TEXA, hash_st, s_unswizzle_buffer);
+			HashTextureLevel(MIP_TEX0, TEXA, region.AdjustForMipmap(i), hash_st, s_unswizzle_buffer);
 		}
 	}
 
@@ -3860,6 +3977,6 @@ void GSTextureCache::HashCacheKey::RemoveCLUTHash()
 u64 GSTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& key) const
 {
 	std::size_t h = 0;
-	HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64);
+	HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64, key.region.bits);
 	return h;
 }
diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h
index 61ea6dab88..0bb73ba770 100644
--- a/pcsx2/GS/Renderers/HW/GSTextureCache.h
+++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h
@@ -44,6 +44,42 @@ public:
 		return valid && overlap;
 	}
 
+	struct SourceRegion
+	{
+		u64 bits;
+
+		bool HasX() const { return static_cast<u32>(bits) != 0; }
+		bool HasY() const { return static_cast<u32>(bits >> 32) != 0; }
+		bool HasEither() const { return (bits != 0); }
+
+		void SetX(u32 min, u32 max) { bits |= (min | (max << 16)); }
+		void SetY(u32 min, u32 max) { bits |= ((static_cast<u64>(min) << 32) | (static_cast<u64>(max) << 48)); }
+
+		u32 GetMinX() const { return static_cast<u32>(bits) & 0xFFFFu; }
+		u32 GetMaxX() const { return static_cast<u32>(bits >> 16) & 0xFFFFu; }
+		u32 GetMinY() const { return static_cast<u32>(bits >> 32) & 0xFFFFu; }
+		u32 GetMaxY() const { return static_cast<u32>(bits >> 48); }
+
+		u32 GetWidth() const { return (GetMaxX() - GetMinX()); }
+		u32 GetHeight() const { return (GetMaxY() - GetMinY()); }
+
+		/// Returns true if the area of the region exceeds the TW/TH size (i.e. "fixed tex0").
+		bool IsFixedTEX0(int tw, int th) const;
+
+		/// Returns the rectangle relative to the texture base pointer that the region occupies.
+		GSVector4i GetRect(int tw, int th) const;
+
+		/// When TW/TH is less than the extents covered by the region ("fixed tex0"), returns the offset
+		/// which should be applied to any coordinates to relocate them to the actual region.
+		GSVector4i GetOffset(int tw, int th) const;
+
+		/// Reduces the range of texels relative to the specified mipmap level.
+		SourceRegion AdjustForMipmap(u32 level) const;
+
+		/// Adjusts the texture base pointer and block width relative to the region.
+		void AdjustTEX0(GIFRegTEX0* TEX0) const;
+	};
+
 	using HashType = u64;
 
 	struct HashCacheKey
@@ -51,10 +87,11 @@ public:
 		HashType TEX0Hash, CLUTHash;
 		GIFRegTEX0 TEX0;
 		GIFRegTEXA TEXA;
+		SourceRegion region;
 
 		HashCacheKey();
 
-		static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod);
+		static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const u32* clut, const GSVector2i* lod, SourceRegion region);
 
 		HashCacheKey WithRemovedCLUTHash() const;
 		void RemoveCLUTHash();
@@ -148,7 +185,7 @@ public:
 		{
 			GSVector4i* rect;
 			u32 count;
-		} m_write;
+		} m_write = {};
 
 		void PreloadLevel(int level);
 
@@ -161,6 +198,7 @@ public:
 		GSTexture* m_palette;
 		GSVector4i m_valid_rect;
 		GSVector2i m_lod;
+		SourceRegion m_region = {};
 		u8 m_valid_hashes = 0;
 		u8 m_complete_layers = 0;
 		bool m_target;
@@ -178,11 +216,13 @@ public:
 		GSOffset::PageLooper m_pages;
 
 	public:
-		Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container = false);
+		Source(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
 		virtual ~Source();
 
 		__fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); }
 
+		void SetPages();
+
 		void Update(const GSVector4i& rect, int layer = 0);
 		void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0);
 
@@ -322,7 +362,7 @@ protected:
 	std::unique_ptr<GSDownloadTexture> m_uint16_download_texture;
 	std::unique_ptr<GSDownloadTexture> m_uint32_download_texture;
 
-	Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut);
+	Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region);
 	Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
 
 	/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
@@ -332,10 +372,10 @@ protected:
 	/// Resizes the download texture if needed.
 	bool PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex);
 
-	HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod);
+	HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod, SourceRegion region);
 
-	static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
-	static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
+	static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
+	static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region);
 
 	// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
 	// TODO: virtual void Write(Target* t, const GSVector4i& r) = 0;
@@ -358,8 +398,8 @@ public:
 
 	GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
 
-	Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod);
-	Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
+	Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, const GSVector2i* lod);
+	Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GIFRegCLAMP& CLAMP, const GSVector4i& r, bool palette = false);
 
 	Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData);
 	Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp
index 2cee44524f..30abd4030f 100644
--- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp
+++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp
@@ -42,6 +42,8 @@
 // this is a #define instead of a variable to avoid warnings from non-literal format strings
 #define TEXTURE_FILENAME_FORMAT_STRING "%" PRIx64 "-%08x"
 #define TEXTURE_FILENAME_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-%08x"
+#define TEXTURE_FILENAME_REGION_FORMAT_STRING "%" PRIx64 "-r%" PRIx64 "-" "-%08x"
+#define TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%" PRIx64 "-%" PRIx64 "-r%" PRIx64 "-%08x"
 #define TEXTURE_REPLACEMENT_SUBDIRECTORY_NAME "replacements"
 #define TEXTURE_DUMP_SUBDIRECTORY_NAME "dumps"
 
@@ -51,6 +53,7 @@ namespace
 	{
 		u64 TEX0Hash;
 		u64 CLUTHash;
+		GSTextureCache::SourceRegion region;
 
 		union
 		{
@@ -68,9 +71,10 @@ namespace
 		};
 		u32 miplevel;
 
-		__fi u32 Width() const { return (1u << TEX0_TW); }
-		__fi u32 Height() const { return (1u << TEX0_TH); }
+		__fi u32 Width() const { return (region.HasX() ? region.GetWidth() : (1u << TEX0_TW)); }
+		__fi u32 Height() const { return (region.HasY() ? region.GetWidth() : (1u << TEX0_TH)); }
 		__fi bool HasPalette() const { return (GSLocalMemory::m_psm[TEX0_PSM].pal > 0); }
+		__fi bool HasRegion() const { return region.HasEither(); }
 
 		__fi GSVector2 ReplacementScale(const GSTextureReplacements::ReplacementTexture& rtex) const
 		{
@@ -79,14 +83,27 @@ namespace
 
 		__fi GSVector2 ReplacementScale(u32 rwidth, u32 rheight) const
 		{
-			return GSVector2(static_cast<float>(rwidth) / static_cast<float>(Width()), static_cast<float>(rheight) / static_cast<float>(Height()));
+			return GSVector2(static_cast<float>(rwidth) / static_cast<float>(Width()),
+				static_cast<float>(rheight) / static_cast<float>(Height()));
 		}
 
-		__fi bool operator==(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) == std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
-		__fi bool operator!=(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) != std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
-		__fi bool operator<(const TextureName& rhs) const { return std::tie(TEX0Hash, CLUTHash, bits) < std::tie(rhs.TEX0Hash, rhs.CLUTHash, rhs.bits); }
+		__fi bool operator==(const TextureName& rhs) const
+		{
+			return std::tie(TEX0Hash, CLUTHash, region.bits, bits) ==
+				   std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
+		}
+		__fi bool operator!=(const TextureName& rhs) const
+		{
+			return std::tie(TEX0Hash, CLUTHash, region.bits, bits) !=
+				   std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
+		}
+		__fi bool operator<(const TextureName& rhs) const
+		{
+			return std::tie(TEX0Hash, CLUTHash, region.bits, bits) <
+				   std::tie(rhs.TEX0Hash, rhs.CLUTHash, region.bits, rhs.bits);
+		}
 	};
-	static_assert(sizeof(TextureName) == 24, "ReplacementTextureName is expected size");
+	static_assert(sizeof(TextureName) == 32, "ReplacementTextureName is expected size");
 } // namespace
 
 namespace std
@@ -97,7 +114,7 @@ namespace std
 		std::size_t operator()(const TextureName& val) const
 		{
 			std::size_t h = 0;
-			HashCombine(h, val.TEX0Hash, val.CLUTHash, val.bits, val.miplevel);
+			HashCombine(h, val.TEX0Hash, val.CLUTHash, val.region.bits, val.bits, val.miplevel);
 			return h;
 		}
 	};
@@ -169,6 +186,7 @@ TextureName GSTextureReplacements::CreateTextureName(const GSTextureCache::HashC
 	name.TEX0Hash = hash.TEX0Hash;
 	name.CLUTHash = name.HasPalette() ? hash.CLUTHash : 0;
 	name.miplevel = miplevel;
+	name.region = hash.region;
 	return name;
 }
 
@@ -184,6 +202,7 @@ GSTextureCache::HashCacheKey GSTextureReplacements::HashCacheKeyFromTextureName(
 	key.TEXA.TA1 = tn.TEXA_TA1;
 	key.TEX0Hash = tn.TEX0Hash;
 	key.CLUTHash = tn.HasPalette() ? tn.CLUTHash : 0;
+	key.region = tn.region;
 	return key;
 }
 
@@ -192,15 +211,38 @@ std::optional<TextureName> GSTextureReplacements::ParseReplacementName(const std
 	TextureName ret;
 	ret.miplevel = 0;
 
-	// TODO(Stenzek): Make this better.
 	char extension_dot;
-	if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits, &extension_dot) != 4 || extension_dot != '.')
+	if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash,
+			&ret.region.bits, &ret.bits, &extension_dot) == 5 &&
+		extension_dot == '.')
 	{
-		if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) != 3 || extension_dot != '.')
-			return std::nullopt;
+		return ret;
 	}
 
-	return ret;
+	if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_REGION_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.region.bits,
+			&ret.bits, &extension_dot) == 4 &&
+		extension_dot == '.')
+	{
+		return ret;
+	}
+
+	ret.region.bits = 0;
+
+	if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_CLUT_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.CLUTHash, &ret.bits,
+			&extension_dot) == 4 &&
+		extension_dot == '.')
+	{
+		return ret;
+	}
+
+	if (std::sscanf(filename.c_str(), TEXTURE_FILENAME_FORMAT_STRING "%c", &ret.TEX0Hash, &ret.bits, &extension_dot) ==
+			3 &&
+		extension_dot == '.')
+	{
+		return ret;
+	}
+
+	return std::nullopt;
 }
 
 std::string GSTextureReplacements::GetGameTextureDirectory()
@@ -229,23 +271,45 @@ std::string GSTextureReplacements::GetDumpFilename(const TextureName& name, u32
 
 	const std::string game_subdir(Path::Combine(game_dir, TEXTURE_DUMP_SUBDIRECTORY_NAME));
 
-	if (name.HasPalette())
+	std::string filename;
+	if (name.HasRegion())
 	{
-		const std::string filename(
-			(level > 0) ?
-                StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.CLUTHash, name.bits, level) :
-                StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png", name.TEX0Hash, name.CLUTHash, name.bits));
-		ret = Path::Combine(game_subdir, filename);
+		if (name.HasPalette())
+		{
+			filename = (level > 0) ?
+						   StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING "-mip%u.png",
+							   name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits, level) :
+						   StringUtil::StdStringFromFormat(TEXTURE_FILENAME_REGION_CLUT_FORMAT_STRING ".png",
+							   name.TEX0Hash, name.CLUTHash, name.region.bits, name.bits);
+		}
+		else
+		{
+			filename = (level > 0) ? StringUtil::StdStringFromFormat(
+										 TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
+									 StringUtil::StdStringFromFormat(
+										 TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits);
+		}
 	}
 	else
 	{
-		const std::string filename(
-			(level > 0) ?
-                StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
-                StringUtil::StdStringFromFormat(TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits));
-		ret = Path::Combine(game_subdir, filename);
+		if (name.HasPalette())
+		{
+			filename = (level > 0) ? StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING "-mip%u.png",
+										 name.TEX0Hash, name.CLUTHash, name.bits, level) :
+									 StringUtil::StdStringFromFormat(TEXTURE_FILENAME_CLUT_FORMAT_STRING ".png",
+										 name.TEX0Hash, name.CLUTHash, name.bits);
+		}
+		else
+		{
+			filename = (level > 0) ? StringUtil::StdStringFromFormat(
+										 TEXTURE_FILENAME_FORMAT_STRING "-mip%u.png", name.TEX0Hash, name.bits, level) :
+									 StringUtil::StdStringFromFormat(
+										 TEXTURE_FILENAME_FORMAT_STRING ".png", name.TEX0Hash, name.bits);
+		}
 	}
 
+	ret = Path::Combine(game_subdir, filename);
+
 	return ret;
 }
 
@@ -569,7 +633,8 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures()
 	s_async_loaded_textures.clear();
 }
 
-void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level)
+void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0,
+	const GIFRegTEXA& TEXA, GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level)
 {
 	// check if it's been dumped or replaced already
 	const TextureName name(CreateTextureName(hash, level));
@@ -589,12 +654,12 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash
 	// compute width/height
 	const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
 	const GSVector2i& bs = psm.bs;
-	const int tw = 1 << TEX0.TW;
-	const int th = 1 << TEX0.TH;
-	const GSVector4i rect(0, 0, tw, th);
+	const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW);
+	const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH);
+	const GSVector4i rect(region.GetRect(tw, th));
 	const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
-	const int read_width = std::max(tw, psm.bs.x);
-	const int read_height = std::max(th, psm.bs.y);
+	const int read_width = block_rect.width();
+	const int read_height = block_rect.height();
 	const u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
 
 	// use per-texture buffer so we can compress the texture asynchronously and not block the GS thread
@@ -603,8 +668,9 @@ void GSTextureReplacements::DumpTexture(const GSTextureCache::HashCacheKey& hash
 	psm.rtx(mem, mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM), block_rect, buffer.GetPtr(), pitch, TEXA);
 
 	// okay, now we can actually dump it
-	QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer)]() {
-		if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr(), pitch))
+	const u32 buffer_offset = ((rect.top - block_rect.top) * pitch) + ((rect.left - block_rect.left) * sizeof(u32));
+	QueueWorkerThreadItem([filename = std::move(filename), tw, th, pitch, buffer = std::move(buffer), buffer_offset]() {
+		if (!SavePNGImage(filename.c_str(), tw, th, buffer.GetPtr() + buffer_offset, pitch))
 			Console.Error("Failed to dump texture to '%s'.", filename.c_str());
 	});
 }
diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h
index cc61be7702..39a8e0c8e7 100644
--- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h
+++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h
@@ -52,7 +52,8 @@ namespace GSTextureReplacements
 	GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, const GSVector2& scale, bool mipmap);
 	void ProcessAsyncLoadedTextures();
 
-	void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, u32 level);
+	void DumpTexture(const GSTextureCache::HashCacheKey& hash, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA,
+		GSTextureCache::SourceRegion region, GSLocalMemory& mem, u32 level);
 	void ClearDumpedTextureList();
 
 	/// Loader will take a filename and interpret the format (e.g. DDS, PNG, etc).
diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
index 85f011b148..a2987d1b72 100644
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
@@ -1374,6 +1374,8 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
 		setFnConstantB(m_fn_constants, pssel.tcc,                GSMTLConstantIndex_PS_TCC);
 		setFnConstantI(m_fn_constants, pssel.wms,                GSMTLConstantIndex_PS_WMS);
 		setFnConstantI(m_fn_constants, pssel.wmt,                GSMTLConstantIndex_PS_WMT);
+		setFnConstantB(m_fn_constants, pssel.adjs,               GSMTLConstantIndex_PS_ADJS);
+		setFnConstantB(m_fn_constants, pssel.adjt,               GSMTLConstantIndex_PS_ADJT);
 		setFnConstantB(m_fn_constants, pssel.ltf,                GSMTLConstantIndex_PS_LTF);
 		setFnConstantB(m_fn_constants, pssel.shuffle,            GSMTLConstantIndex_PS_SHUFFLE);
 		setFnConstantB(m_fn_constants, pssel.read_ba,            GSMTLConstantIndex_PS_READ_BA);
@@ -1403,7 +1405,6 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
 		setFnConstantB(m_fn_constants, pssel.automatic_lod,      GSMTLConstantIndex_PS_AUTOMATIC_LOD);
 		setFnConstantB(m_fn_constants, pssel.manual_lod,         GSMTLConstantIndex_PS_MANUAL_LOD);
 		setFnConstantB(m_fn_constants, pssel.point_sampler,      GSMTLConstantIndex_PS_POINT_SAMPLER);
-		setFnConstantB(m_fn_constants, pssel.invalid_tex0,       GSMTLConstantIndex_PS_INVALID_TEX0);
 		setFnConstantI(m_fn_constants, pssel.scanmsk,            GSMTLConstantIndex_PS_SCANMSK);
 		auto newps = LoadShader(@"ps_main");
 		ps = newps;
@@ -1594,10 +1595,10 @@ static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, WH)               == of
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.x) == offsetof(GSMTLMainPSUniform, ta));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.z) == offsetof(GSMTLMainPSUniform, max_depth));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.w) == offsetof(GSMTLMainPSUniform, alpha_fix));
-static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MskFix)           == offsetof(GSMTLMainPSUniform, uv_msk_fix));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FbMask)           == offsetof(GSMTLMainPSUniform, fbmask));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, HalfTexel)        == offsetof(GSMTLMainPSUniform, half_texel));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MinMax)           == offsetof(GSMTLMainPSUniform, uv_min_max));
+static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STRange)          == offsetof(GSMTLMainPSUniform, st_range));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle)   == offsetof(GSMTLMainPSUniform, channel_shuffle));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack)     == offsetof(GSMTLMainPSUniform, tc_offset));
 static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale)          == offsetof(GSMTLMainPSUniform, st_scale));
diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
index 6d22d1b2d1..d8c2265c37 100644
--- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
+++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
@@ -108,11 +108,15 @@ struct GSMTLMainPSUniform
 	vector_float2 ta;
 	float max_depth;
 	float alpha_fix;
-	vector_uint4 uv_msk_fix;
 	vector_uint4 fbmask;
 
 	vector_float4 half_texel;
-	vector_float4 uv_min_max;
+	union
+	{
+		vector_float4 uv_min_max;
+		vector_uint4 uv_msk_fix;
+	};
+	vector_float4 st_range;
 	struct
 	{
 		unsigned int blue_mask;
@@ -166,6 +170,8 @@ enum GSMTLFnConstants
 	GSMTLConstantIndex_PS_TCC,
 	GSMTLConstantIndex_PS_WMS,
 	GSMTLConstantIndex_PS_WMT,
+	GSMTLConstantIndex_PS_ADJS,
+	GSMTLConstantIndex_PS_ADJT,
 	GSMTLConstantIndex_PS_LTF,
 	GSMTLConstantIndex_PS_SHUFFLE,
 	GSMTLConstantIndex_PS_READ_BA,
@@ -194,6 +200,5 @@ enum GSMTLFnConstants
 	GSMTLConstantIndex_PS_AUTOMATIC_LOD,
 	GSMTLConstantIndex_PS_MANUAL_LOD,
 	GSMTLConstantIndex_PS_POINT_SAMPLER,
-	GSMTLConstantIndex_PS_INVALID_TEX0,
 	GSMTLConstantIndex_PS_SCANMSK,
 };
diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal
index 1daa381f84..4a46cceaa8 100644
--- a/pcsx2/GS/Renderers/Metal/tfx.metal
+++ b/pcsx2/GS/Renderers/Metal/tfx.metal
@@ -37,6 +37,8 @@ constant uint PS_TFX                [[function_constant(GSMTLConstantIndex_PS_TF
 constant bool PS_TCC                [[function_constant(GSMTLConstantIndex_PS_TCC)]];
 constant uint PS_WMS                [[function_constant(GSMTLConstantIndex_PS_WMS)]];
 constant uint PS_WMT                [[function_constant(GSMTLConstantIndex_PS_WMT)]];
+constant bool PS_ADJS               [[function_constant(GSMTLConstantIndex_PS_ADJS)]];
+constant bool PS_ADJT               [[function_constant(GSMTLConstantIndex_PS_ADJT)]];
 constant bool PS_LTF                [[function_constant(GSMTLConstantIndex_PS_LTF)]];
 constant bool PS_SHUFFLE            [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
 constant bool PS_READ_BA            [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
@@ -65,7 +67,6 @@ constant bool PS_TEX_IS_FB          [[function_constant(GSMTLConstantIndex_PS_TE
 constant bool PS_AUTOMATIC_LOD      [[function_constant(GSMTLConstantIndex_PS_AUTOMATIC_LOD)]];
 constant bool PS_MANUAL_LOD         [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]];
 constant bool PS_POINT_SAMPLER      [[function_constant(GSMTLConstantIndex_PS_POINT_SAMPLER)]];
-constant bool PS_INVALID_TEX0       [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]];
 constant uint PS_SCANMSK            [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]];
 
 constant GSMTLExpandType VS_EXPAND_TYPE = static_cast<GSMTLExpandType>(VS_EXPAND_TYPE_RAW);
@@ -321,7 +322,21 @@ struct PSMain
 			// As of 2018 this issue is still present.
 			uv = (trunc(uv * cb.wh.zw) + 0.5) / cb.wh.zw;
 		}
-		uv *= cb.st_scale;
+		if (!PS_ADJS && !PS_ADJT)
+		{
+			uv *= cb.st_scale;
+		}
+		else
+		{
+			if (PS_ADJS)
+				uv.x = (uv.x - cb.st_range.x) * cb.st_range.z;
+			else
+				uv.x = uv.x * cb.st_scale.x;
+			if (PS_ADJT)
+				uv.y = (uv.y - cb.st_range.y) * cb.st_range.w;
+			else
+				uv.y = uv.y * cb.st_scale.y;
+		}
 
 		if (PS_AUTOMATIC_LOD)
 		{
@@ -360,7 +375,7 @@ struct PSMain
 	float4 clamp_wrap_uv(float4 uv)
 	{
 		float4 uv_out = uv;
-		float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy;
+		float4 tex_size = cb.wh.xyxy;
 
 		if (PS_WMS == PS_WMT)
 		{
@@ -724,12 +739,7 @@ struct PSMain
 	float4 ps_color()
 	{
 		float2 st, st_int;
-		if (!FST && PS_INVALID_TEX0)
-		{
-			st = (in.t.xy * cb.wh.xy) / (in.t.w * cb.wh.zw);
-			st_int = (in.ti.zw * cb.wh.xy) / (in.t.w * cb.wh.zw);
-		}
-		else if (!FST)
+		if (!FST)
 		{
 			st = in.t.xy / in.t.w;
 			st_int = in.ti.zw / in.t.w;
diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
index 7fb4074408..7523c27170 100644
--- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
+++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
@@ -1029,6 +1029,8 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
 	std::string macro = fmt::format("#define PS_FST {}\n", sel.fst)
 		+ fmt::format("#define PS_WMS {}\n", sel.wms)
 		+ fmt::format("#define PS_WMT {}\n", sel.wmt)
+		+ fmt::format("#define PS_ADJS {}\n", sel.adjs)
+		+ fmt::format("#define PS_ADJT {}\n", sel.adjt)
 		+ fmt::format("#define PS_AEM_FMT {}\n", sel.aem_fmt)
 		+ fmt::format("#define PS_PAL_FMT {}\n", sel.pal_fmt)
 		+ fmt::format("#define PS_DFMT {}\n", sel.dfmt)
@@ -1037,7 +1039,6 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
 		+ fmt::format("#define PS_URBAN_CHAOS_HLE {}\n", sel.urban_chaos_hle)
 		+ fmt::format("#define PS_TALES_OF_ABYSS_HLE {}\n", sel.tales_of_abyss_hle)
 		+ fmt::format("#define PS_TEX_IS_FB {}\n", sel.tex_is_fb)
-		+ fmt::format("#define PS_INVALID_TEX0 {}\n", sel.invalid_tex0)
 		+ fmt::format("#define PS_AEM {}\n", sel.aem)
 		+ fmt::format("#define PS_TFX {}\n", sel.tfx)
 		+ fmt::format("#define PS_TCC {}\n", sel.tcc)
diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
index 1a1ee0155d..0e4bf4f6a5 100644
--- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
+++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
@@ -1948,6 +1948,8 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
 	AddMacro(ss, "PS_FST", sel.fst);
 	AddMacro(ss, "PS_WMS", sel.wms);
 	AddMacro(ss, "PS_WMT", sel.wmt);
+	AddMacro(ss, "PS_ADJS", sel.adjs);
+	AddMacro(ss, "PS_ADJT", sel.adjt);
 	AddMacro(ss, "PS_AEM_FMT", sel.aem_fmt);
 	AddMacro(ss, "PS_PAL_FMT", sel.pal_fmt);
 	AddMacro(ss, "PS_DFMT", sel.dfmt);
@@ -1955,7 +1957,6 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
 	AddMacro(ss, "PS_CHANNEL_FETCH", sel.channel);
 	AddMacro(ss, "PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);
 	AddMacro(ss, "PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
-	AddMacro(ss, "PS_INVALID_TEX0", sel.invalid_tex0);
 	AddMacro(ss, "PS_AEM", sel.aem);
 	AddMacro(ss, "PS_TFX", sel.tfx);
 	AddMacro(ss, "PS_TCC", sel.tcc);
diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h
index 602ed0d8bf..af2c5fe8dd 100644
--- a/pcsx2/ShaderCacheVersion.h
+++ b/pcsx2/ShaderCacheVersion.h
@@ -15,4 +15,4 @@
 
 /// Version number for GS and other shaders. Increment whenever any of the contents of the
 /// shaders change, to invalidate the cache.
-static constexpr u32 SHADER_CACHE_VERSION = 11;
+static constexpr u32 SHADER_CACHE_VERSION = 12;