GS:HW: Don't scale rt when converting to texture

Scale texture coordinates instead
2022-01-16 01:05:05 -06:00 · 2022-01-16 01:05:05 -06:00 · 6d0b9b3747
parent 92f2cef4d1
commit 6d0b9b3747
8 changed files with 39 additions and 105 deletions
--- a/bin/resources/shaders/dx11/tfx.fx
+++ b/bin/resources/shaders/dx11/tfx.fx
@ -137,7 +137,7 @@ cbuffer cb1
 	float4 MinMax;
 	int4 ChannelShuffle;
 	float2 TC_OffsetHack;
-	float2 pad_cb1;
+	float2 STScale;
 	float4x4 DitherMatrix;
 };
@ -156,6 +156,7 @@ float4 sample_c(float2 uv, float uv_w)
 		// As of 2018 this issue is still present.
 		uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw;
 	}
 	uv *= STScale;
 #if PS_AUTOMATIC_LOD == 1
 	return Texture.Sample(TextureSampler, uv);
--- a/bin/resources/shaders/opengl/common_header.glsl
+++ b/bin/resources/shaders/opengl/common_header.glsl
@ -82,7 +82,7 @@ layout(std140, binding = 0) uniform cb21
    ivec4 ChannelShuffle;
    vec2 TC_OffsetHack;
-    vec2 pad_cb21;
+    vec2 STScale;
    mat4 DitherMatrix;
 };
--- a/bin/resources/shaders/opengl/tfx_fs.glsl
+++ b/bin/resources/shaders/opengl/tfx_fs.glsl
@ -91,6 +91,7 @@ vec4 sample_c(vec2 uv)
    // As of 2018 this issue is still present.
    uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
 #endif
    uv *= STScale;
 #if PS_AUTOMATIC_LOD == 1
    return texture(TextureSampler, uv);
--- a/bin/resources/shaders/vulkan/tfx.glsl
+++ b/bin/resources/shaders/vulkan/tfx.glsl
@ -363,7 +363,7 @@ layout(std140, set = 0, binding = 1) uniform cb1
 	vec4 MinMax;
 	ivec4 ChannelShuffle;
 	vec2 TC_OffsetHack;
-	vec2 pad_cb1;
+	vec2 STScale;
 	mat4 DitherMatrix;
 };
@ -410,6 +410,7 @@ vec4 sample_c(vec2 uv)
 		// As of 2018 this issue is still present.
 		uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
 #endif
 	uv *= STScale;
 #if PS_AUTOMATIC_LOD == 1
    return texture(Texture, uv);
--- a/pcsx2/GS/Renderers/Common/GSDevice.cpp
+++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp
@ -154,6 +154,7 @@ GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int width, int height, i
 		}
 	}
 	t->SetScale(GSVector2(1, 1)); // Things seem to assume that all textures come out of here with scale 1...
 	t->Commit(); // Clear won't be done if the texture isn't committed.
 	switch (type)
--- a/pcsx2/GS/Renderers/Common/GSDevice.h
+++ b/pcsx2/GS/Renderers/Common/GSDevice.h
@ -417,7 +417,7 @@ struct alignas(16) GSHWDrawConfig
 		GSVector4 MinMax;
 		GSVector4i ChannelShuffle;
 		GSVector2 TCOffsetHack;
-		float pad1[2];
+		GSVector2 STScale;
 		GSVector4 DitherMatrix[4];
--- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp
+++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp
@ -991,13 +991,18 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
 	m_conf.ps.ltf = bilinear && shader_emulated_sampler;
 	m_conf.ps.point_sampler = g_gs_device->Features().broken_point_sampler && (!bilinear || shader_emulated_sampler);
 	const GSVector2 scale = tex->m_texture->GetScale();
 	const int w = tex->m_texture->GetWidth();
 	const int h = tex->m_texture->GetHeight();
 	const int tw = (int)(1 << m_context->TEX0.TW);
 	const int th = (int)(1 << m_context->TEX0.TH);
 	const int miptw = 1 << tex->m_TEX0.TW;
 	const int mipth = 1 << tex->m_TEX0.TH;
-	const GSVector4 WH(tw, th, w, h);
+	const GSVector4 WH(static_cast<float>(tw), static_cast<float>(th), miptw * scale.x, mipth * scale.y);
 	const GSVector4 st_scale = WH.zwzw() / GSVector4(w, h).xyxy();
 	m_conf.cb_ps.STScale = GSVector2(st_scale.x, st_scale.y);
 	m_conf.ps.fst = !!PRIM->FST;
--- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp
+++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp
@ -1332,8 +1332,10 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 		// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
-		int w = (int)(dst->m_texture->GetScale().x * tw);
+		GSVector2i dstsize = dst->m_texture->GetSize();
-		int h = (int)(dst->m_texture->GetScale().y * th);
+
 		int w = std::min(dstsize.x, static_cast<int>(dst->m_texture->GetScale().x * tw));
 		int h = std::min(dstsize.y, static_cast<int>(dst->m_texture->GetScale().y * th));
 		if (is_8bits)
 		{
 			// Unscale 8 bits textures, quality won't be nice but format is really awful
@ -1341,8 +1343,6 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 			h = th;
 		}
 		GSVector2i dstsize = dst->m_texture->GetSize();
 		// pitch conversion
 		if (dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM
@ -1405,59 +1405,34 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 		GSVector2 scale = dst->m_texture->GetScale();
-		GSVector4 dRect(0, 0, w, h);
+		GSVector4i sRect(0, 0, w, h);
 		const bool use_texture = shader == ShaderConvert::COPY;
-		// Lengthy explanation of the rescaling code.
+		if (half_right)
 		// Here an example in 2x:
 		// RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling)
 		//
 		// PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels)
 		// So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is
 		// too small.
 		//
 		// So we will only limit the resize to the available data in RT.
 		// Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture
 		// size (which is 1280x1024) (i.e. 800x512)
 		// From the rendering point of view. UV coordinate will be normalized on the real GS texture size
 		// This way it can be used on an upscaled texture without extra scaling factor (only requirement is
 		// to have same proportion)
 		//
 		// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
 		// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
 		// I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less
 		// than ideal.
 		// 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong
 		// 2/ It doesn't support rescaling of the RT (tw = 1024)
 		// Maybe it will be more easy to just round the UV value in the Vertex Shader
 		if (!is_8bits)
 		{
-			// 8 bits handling is special due to unscaling. It is better to not execute this code
+			// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
-			if (w > dstsize.x)
+			// which is set to some arbitrary number. h/w are based on the input texture
 			// so the only reliable way to find the real size of the target is to use the TBW value.
 			int half_width = static_cast<int>(dst->m_TEX0.TBW * (64 / 2) * dst->m_texture->GetScale().x);
 			if (half_width < dstsize.x)
 			{
-				scale.x = (float)dstsize.x / tw;
+				int copy_width = std::min(half_width, dstsize.x - half_width);
-				dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
+				sRect.x = half_width;
-				w = dstsize.x;
+				sRect.z = half_width + copy_width;
 				w = copy_width;
 			}
-
+			else
 			if (h > dstsize.y)
 			{
-				scale.y = (float)dstsize.y / th;
+				DevCon.Error("Invalid half-right copy with width %d from %dx%d texture", half_width * 2, w, h);
 				dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
 				h = dstsize.y;
 			}
 		}
 		GSVector4 sRect(0, 0, w, h);
 		const bool texture_completely_overwritten = ((sRect == dRect).alltrue());
 		const bool use_texture = (texture_completely_overwritten && shader == ShaderConvert::COPY);
 		// Don't be fooled by the name. 'dst' is the old target (hence the input)
 		// 'src' is the new texture cache entry (hence the output)
 		GSTexture* sTex = dst->m_texture;
 		GSTexture* dTex = use_texture ?
 			g_gs_device->CreateTexture(w, h, false, GSTexture::Format::Color, true) :
-			g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, !texture_completely_overwritten);
+			g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, false);
 		src->m_texture = dTex;
 		// GH: by default (m_paltex == 0) GS converts texture to the 32 bit format
@ -1467,68 +1442,18 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
 		{
 			AttachPaletteToSource(src, psm.pal, true);
 		}
 		// Disable linear filtering for various GS post-processing effect
 		// 1/ Palette is used to interpret the alpha channel of the RT as an index.
 		// Star Ocean 3 uses it to emulate a stencil buffer.
 		// 2/ Z formats are a bad idea to interpolate (discontinuties).
 		// 3/ 16 bits buffer is used to move data from a channel to another.
 		//
 		// I keep linear filtering for standard color even if I'm not sure that it is
 		// working correctly.
 		// Indeed, texture is reduced so you need to read all covered pixels (9 in 3x)
 		// to correctly interpolate the value. Linear interpolation is likely acceptable
 		// only in 2x scaling
 		//
 		// Src texture will still be bilinear interpolated so I'm really not sure
 		// that we need to do it here too.
 		//
 		// Future note: instead to do
 		// RT 2048x2048 -> T 1024x1024 -> RT 2048x2048
 		// We can maybe sample directly a bigger texture
 		// RT 2048x2048 -> T 2048x2048 -> RT 2048x2048
 		// Pro: better quality. Copy instead of StretchRect (must be faster)
 		// Cons: consume more memory
 		//
 		// In distant future: investigate to reuse the RT directly without any
 		// copy. Likely a speed boost and memory usage reduction.
 		bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24);
 		if (use_texture)
 		{
-			if (half_right)
+			g_gs_device->CopyRect(sTex, dTex, sRect);
 			{
 				// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
 				// which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture
 				// so the only reliable way to find the real size of the target is to use the TBW value.
 				const float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x;
 				const GSVector4i real_rc((int)(real_width / 2.0f), 0, (int)real_width, h);
 				if (real_rc.width() > w)
 				{
 					DevCon.Error("Dropping invalid half_write copy from {%d,%d} %dx%d to %dx%d",
 						real_rc.x, real_rc.y, real_rc.width(), real_rc.height(), w, h);
 		}
 		else
 		{
-					g_gs_device->CopyRect(sTex, dTex, real_rc);
+			GSVector4 sRectF(sRect);
-				}
+			sRectF.z /= sTex->GetWidth();
-			}
+			sRectF.w /= sTex->GetHeight();
 			else
 			{
 				g_gs_device->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w
 			}
 		}
 		else
 		{
 			// Different size or not the same format
 			sRect.z /= sTex->GetWidth();
 			sRect.w /= sTex->GetHeight();
-			if (half_right)
+			g_gs_device->StretchRect(sTex, sRectF, dTex, GSVector4(0, 0, w, h), shader, false);
 			{
 				sRect.x = sRect.z / 2.0f;
 			}
 			g_gs_device->StretchRect(sTex, sRect, dTex, dRect, shader, linear);
 		}
 		if (src->m_texture)