From 8f19912c641435cb23c83ecb33a066f8be59294d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Thu, 12 Dec 2024 20:19:27 +1000 Subject: [PATCH] GPU/ShaderGen: Use sample instead of load at 1x as well Consistency. Mali ends up ever-so-slightly faster with sample versus texel loads, apparently. Also fixes compile errors when using texture filtering on GLSL ES. --- src/core/gpu_hw_shadergen.cpp | 4 ++-- src/core/shader_cache_version.h | 2 +- src/util/shadergen.cpp | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index e13cf4dbd..b3fd5fdd7 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -883,7 +883,7 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) #if !UPSCALED uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); uint2 vicoord = (texpage.xy + icoord) & uint2(1023, 511); - return LOAD_TEXTURE(samp0, int2(vicoord), 0); + return SAMPLE_TEXTURE_LEVEL(samp0, float2(vicoord) * RCP_VRAM_SIZE, 0.0); #else // Coordinates are already upscaled, we need to downscale them to apply the texture // window, then re-upscale/offset. We can't round here, because it could result in @@ -979,7 +979,7 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) ialpha = 1.0; #elif TEXTURE_FILTERING #if PAGE_TEXTURE - FilteredSampleFromVRAM(int2(0, 0), v_tex0, v_uv_limits, texcol, ialpha); + FilteredSampleFromVRAM(VECTOR_BROADCAST(TEXPAGE_VALUE, 0u), v_tex0, v_uv_limits, texcol, ialpha); #else FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha); #endif diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index f48ee7f87..dd969ce79 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -5,4 +5,4 @@ #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 23; +static constexpr u32 SHADER_CACHE_VERSION = 24; diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 98907cbb1..c77826c0d 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -298,6 +298,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */ ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n"; ss << "#define BEGIN_ARRAY(type, size) type[size](\n"; ss << "#define END_ARRAY )\n"; + ss << "#define VECTOR_BROADCAST(type, value) (type(value))\n"; ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n"; ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n"; @@ -344,6 +345,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */ ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n"; ss << "#define BEGIN_ARRAY(type, size) {\n"; ss << "#define END_ARRAY }\n"; + ss << "#define VECTOR_BROADCAST(type, value) ((type)(value))\n"; } ss << "\n";