diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx index 463cb35d83..b58c28bbb9 100644 --- a/bin/resources/shaders/dx11/convert.fx +++ b/bin/resources/shaders/dx11/convert.fx @@ -162,36 +162,98 @@ PS_OUTPUT ps_convert_float16_rgb5a1(PS_INPUT input) return output; } + +float rgba8_to_depth32(float4 val) +{ + uint4 c = uint4(val * 255.5f); + return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); +} + +float rgba8_to_depth24(float4 val) +{ + uint3 c = uint3(val.rgb * 255.5f); + return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); +} + +float rgba8_to_depth16(float4 val) +{ + uint2 c = uint2(val.rg * 255.5f); + return float(c.r | (c.g << 8)) * exp2(-32.0f); +} + +float rgb5a1_to_depth16(float4 val) +{ + uint4 c = uint4(val * 255.5f); + return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); +} + float ps_convert_rgba8_float32(PS_INPUT input) : SV_Depth { - // Convert a RRGBA texture into a float depth texture - uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f); - return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + return rgba8_to_depth32(sample_c(input.t)); } float ps_convert_rgba8_float24(PS_INPUT input) : SV_Depth { // Same as above but without the alpha channel (24 bits Z) - // Convert a RRGBA texture into a float depth texture - uint3 c = uint3(sample_c(input.t).rgb * 255.0f + 0.5f); - return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + return rgba8_to_depth24(sample_c(input.t)); } float ps_convert_rgba8_float16(PS_INPUT input) : SV_Depth { // Same as above but without the A/B channels (16 bits Z) - // Convert a RRGBA texture into a float depth texture - uint2 c = uint2(sample_c(input.t).rg * 255.0f + 0.5f); - return float(c.r | (c.g << 8)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + return rgba8_to_depth16(sample_c(input.t)); } float ps_convert_rgb5a1_float16(PS_INPUT input) : SV_Depth { - // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z - uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f); - return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + return rgb5a1_to_depth16(sample_c(input.t)); +} + +#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \ + uint width, height; \ + Texture.GetDimensions(width, height); \ + float2 top_left_f = input.t * float2(width, height) - 0.5f; \ + int2 top_left = int2(floor(top_left_f)); \ + int4 coords = clamp(int4(top_left, top_left + 1), int4(0, 0, 0, 0), int2(width - 1, height - 1).xyxy); \ + float2 mix_vals = frac(top_left_f); \ + float depthTL = CONVERT_FN(Texture.Load(int3(coords.xy, 0))); \ + float depthTR = CONVERT_FN(Texture.Load(int3(coords.zy, 0))); \ + float depthBL = CONVERT_FN(Texture.Load(int3(coords.xw, 0))); \ + float depthBR = CONVERT_FN(Texture.Load(int3(coords.zw, 0))); \ + return lerp(lerp(depthTL, depthTR, mix_vals.x), lerp(depthBL, depthBR, mix_vals.x), mix_vals.y); + +float ps_convert_rgba8_float32_biln(PS_INPUT input) : SV_Depth +{ + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32); +} + +float ps_convert_rgba8_float24_biln(PS_INPUT input) : SV_Depth +{ + // Same as above but without the alpha channel (24 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24); +} + +float ps_convert_rgba8_float16_biln(PS_INPUT input) : SV_Depth +{ + // Same as above but without the A/B channels (16 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16); +} + +float ps_convert_rgb5a1_float16_biln(PS_INPUT input) : SV_Depth +{ + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16); } PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input) diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl index 257bd24dc3..2fae5d922f 100644 --- a/bin/resources/shaders/opengl/convert.glsl +++ b/bin/resources/shaders/opengl/convert.glsl @@ -97,12 +97,35 @@ void ps_convert_float16_rgb5a1() } #endif +float rgba8_to_depth32(vec4 unorm) +{ + uvec4 c = uvec4(unorm * vec4(255.5f)); + return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); +} + +float rgba8_to_depth24(vec4 unorm) +{ + uvec3 c = uvec3(unorm.rgb * vec3(255.5f)); + return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); +} + +float rgba8_to_depth16(vec4 unorm) +{ + uvec2 c = uvec2(unorm.rg * vec2(255.5f)); + return float(c.r | (c.g << 8)) * exp2(-32.0f); +} + +float rgb5a1_to_depth16(vec4 unorm) +{ + uvec4 c = uvec4(unorm * vec4(255.5f)); + return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); +} + #ifdef ps_convert_rgba8_float32 void ps_convert_rgba8_float32() { - // Convert a RRGBA texture into a float depth texture - uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth32(sample_c()); } #endif @@ -111,9 +134,8 @@ void ps_convert_rgba8_float24() { // Same as above but without the alpha channel (24 bits Z) - // Convert a RRGBA texture into a float depth texture - uvec3 c = uvec3(sample_c().rgb * vec3(255.0f) + vec3(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth24(sample_c()); } #endif @@ -122,18 +144,64 @@ void ps_convert_rgba8_float16() { // Same as above but without the A/B channels (16 bits Z) - // Convert a RRGBA texture into a float depth texture - uvec2 c = uvec2(sample_c().rg * vec2(255.0f) + vec2(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth16(sample_c()); } #endif #ifdef ps_convert_rgb5a1_float16 void ps_convert_rgb5a1_float16() { - // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z - uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f)); - gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + gl_FragDepth = rgb5a1_to_depth16(sample_c()); +} +#endif + +#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \ + ivec2 dims = textureSize(TextureSampler, 0); \ + vec2 top_left_f = PSin_t * vec2(dims) - 0.5f; \ + ivec2 top_left = ivec2(floor(top_left_f)); \ + ivec4 coords = clamp(ivec4(top_left, top_left + 1), ivec4(0), dims.xyxy - 1); \ + vec2 mix_vals = fract(top_left_f); \ + float depthTL = CONVERT_FN(texelFetch(TextureSampler, coords.xy, 0)); \ + float depthTR = CONVERT_FN(texelFetch(TextureSampler, coords.zy, 0)); \ + float depthBL = CONVERT_FN(texelFetch(TextureSampler, coords.xw, 0)); \ + float depthBR = CONVERT_FN(texelFetch(TextureSampler, coords.zw, 0)); \ + gl_FragDepth = mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y); + +#ifdef ps_convert_rgba8_float32_biln +void ps_convert_rgba8_float32_biln() +{ + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32); +} +#endif + +#ifdef ps_convert_rgba8_float24_biln +void ps_convert_rgba8_float24_biln() +{ + // Same as above but without the alpha channel (24 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24); +} +#endif + +#ifdef ps_convert_rgba8_float16_biln +void ps_convert_rgba8_float16_biln() +{ + // Same as above but without the A/B channels (16 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16); +} +#endif + +#ifdef ps_convert_rgb5a1_float16_biln +void ps_convert_rgb5a1_float16_biln() +{ + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16); } #endif diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index ab5f824ec2..05b51144bb 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -128,12 +128,35 @@ void ps_convert_float16_rgb5a1() } #endif +float rgba8_to_depth32(vec4 unorm) +{ + uvec4 c = uvec4(unorm * vec4(255.5f)); + return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); +} + +float rgba8_to_depth24(vec4 unorm) +{ + uvec3 c = uvec3(unorm.rgb * vec3(255.5f)); + return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); +} + +float rgba8_to_depth16(vec4 unorm) +{ + uvec2 c = uvec2(unorm.rg * vec2(255.5f)); + return float(c.r | (c.g << 8)) * exp2(-32.0f); +} + +float rgb5a1_to_depth16(vec4 unorm) +{ + uvec4 c = uvec4(unorm * vec4(255.5f)); + return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); +} + #ifdef ps_convert_rgba8_float32 void ps_convert_rgba8_float32() { - // Convert a RRGBA texture into a float depth texture - uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth32(sample_c(v_tex)); } #endif @@ -142,9 +165,8 @@ void ps_convert_rgba8_float24() { // Same as above but without the alpha channel (24 bits Z) - // Convert a RRGBA texture into a float depth texture - uvec3 c = uvec3(sample_c(v_tex).rgb * vec3(255.0f) + vec3(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth24(sample_c(v_tex)); } #endif @@ -153,18 +175,64 @@ void ps_convert_rgba8_float16() { // Same as above but without the A/B channels (16 bits Z) - // Convert a RRGBA texture into a float depth texture - uvec2 c = uvec2(sample_c(v_tex).rg * vec2(255.0f) + vec2(0.5f)); - gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f); + // Convert an RGBA texture into a float depth texture + gl_FragDepth = rgba8_to_depth16(sample_c(v_tex)); } #endif #ifdef ps_convert_rgb5a1_float16 void ps_convert_rgb5a1_float16() { - // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z - uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f)); - gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + gl_FragDepth = rgb5a1_to_depth16(sample_c(v_tex)); +} +#endif + +#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \ + ivec2 dims = textureSize(samp0, 0); \ + vec2 top_left_f = v_tex * vec2(dims) - 0.5f; \ + ivec2 top_left = ivec2(floor(top_left_f)); \ + ivec4 coords = clamp(ivec4(top_left, top_left + 1), ivec4(0), dims.xyxy - 1); \ + vec2 mix_vals = fract(top_left_f); \ + float depthTL = CONVERT_FN(texelFetch(samp0, coords.xy, 0)); \ + float depthTR = CONVERT_FN(texelFetch(samp0, coords.zy, 0)); \ + float depthBL = CONVERT_FN(texelFetch(samp0, coords.xw, 0)); \ + float depthBR = CONVERT_FN(texelFetch(samp0, coords.zw, 0)); \ + gl_FragDepth = mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y); + +#ifdef ps_convert_rgba8_float32_biln +void ps_convert_rgba8_float32_biln() +{ + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32); +} +#endif + +#ifdef ps_convert_rgba8_float24_biln +void ps_convert_rgba8_float24_biln() +{ + // Same as above but without the alpha channel (24 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24); +} +#endif + +#ifdef ps_convert_rgba8_float16_biln +void ps_convert_rgba8_float16_biln() +{ + // Same as above but without the A/B channels (16 bits Z) + + // Convert an RGBA texture into a float depth texture + SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16); +} +#endif + +#ifdef ps_convert_rgb5a1_float16_biln +void ps_convert_rgb5a1_float16_biln() +{ + // Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z + SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16); } #endif diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 68ee25d7d5..fa31bf6859 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -23,23 +23,27 @@ const char* shaderName(ShaderConvert value) switch (value) { // clang-format off - case ShaderConvert::COPY: return "ps_copy"; - case ShaderConvert::RGBA8_TO_16_BITS: return "ps_convert_rgba8_16bits"; - case ShaderConvert::DATM_1: return "ps_datm1"; - case ShaderConvert::DATM_0: return "ps_datm0"; - case ShaderConvert::MOD_256: return "ps_mod256"; - case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency"; - case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits"; - case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits"; - case ShaderConvert::FLOAT32_TO_RGBA8: return "ps_convert_float32_rgba8"; - case ShaderConvert::FLOAT16_TO_RGB5A1: return "ps_convert_float16_rgb5a1"; - case ShaderConvert::RGBA8_TO_FLOAT32: return "ps_convert_rgba8_float32"; - case ShaderConvert::RGBA8_TO_FLOAT24: return "ps_convert_rgba8_float24"; - case ShaderConvert::RGBA8_TO_FLOAT16: return "ps_convert_rgba8_float16"; - case ShaderConvert::RGB5A1_TO_FLOAT16: return "ps_convert_rgb5a1_float16"; - case ShaderConvert::DEPTH_COPY: return "ps_depth_copy"; - case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i"; - case ShaderConvert::YUV: return "ps_yuv"; + case ShaderConvert::COPY: return "ps_copy"; + case ShaderConvert::RGBA8_TO_16_BITS: return "ps_convert_rgba8_16bits"; + case ShaderConvert::DATM_1: return "ps_datm1"; + case ShaderConvert::DATM_0: return "ps_datm0"; + case ShaderConvert::MOD_256: return "ps_mod256"; + case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency"; + case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits"; + case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits"; + case ShaderConvert::FLOAT32_TO_RGBA8: return "ps_convert_float32_rgba8"; + case ShaderConvert::FLOAT16_TO_RGB5A1: return "ps_convert_float16_rgb5a1"; + case ShaderConvert::RGBA8_TO_FLOAT32: return "ps_convert_rgba8_float32"; + case ShaderConvert::RGBA8_TO_FLOAT24: return "ps_convert_rgba8_float24"; + case ShaderConvert::RGBA8_TO_FLOAT16: return "ps_convert_rgba8_float16"; + case ShaderConvert::RGB5A1_TO_FLOAT16: return "ps_convert_rgb5a1_float16"; + case ShaderConvert::RGBA8_TO_FLOAT32_BILN: return "ps_convert_rgba8_float32_biln"; + case ShaderConvert::RGBA8_TO_FLOAT24_BILN: return "ps_convert_rgba8_float24_biln"; + case ShaderConvert::RGBA8_TO_FLOAT16_BILN: return "ps_convert_rgba8_float16_biln"; + case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln"; + case ShaderConvert::DEPTH_COPY: return "ps_depth_copy"; + case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i"; + case ShaderConvert::YUV: return "ps_yuv"; // clang-format on default: ASSERT(0); diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 99a1e32433..f573da6f56 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -45,12 +45,75 @@ enum class ShaderConvert RGBA8_TO_FLOAT24, RGBA8_TO_FLOAT16, RGB5A1_TO_FLOAT16, + RGBA8_TO_FLOAT32_BILN, + RGBA8_TO_FLOAT24_BILN, + RGBA8_TO_FLOAT16_BILN, + RGB5A1_TO_FLOAT16_BILN, DEPTH_COPY, RGBA_TO_8I, YUV, Count }; +static inline bool HasDepthOutput(ShaderConvert shader) +{ + switch (shader) + { + case ShaderConvert::RGBA8_TO_FLOAT32: + case ShaderConvert::RGBA8_TO_FLOAT24: + case ShaderConvert::RGBA8_TO_FLOAT16: + case ShaderConvert::RGB5A1_TO_FLOAT16: + case ShaderConvert::RGBA8_TO_FLOAT32_BILN: + case ShaderConvert::RGBA8_TO_FLOAT24_BILN: + case ShaderConvert::RGBA8_TO_FLOAT16_BILN: + case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: + case ShaderConvert::DEPTH_COPY: + return true; + default: + return false; + } +} + +static inline bool HasStencilOutput(ShaderConvert shader) +{ + switch (shader) + { + case ShaderConvert::DATM_0: + case ShaderConvert::DATM_1: + return true; + default: + return false; + } +} + +static inline bool SupportsNearest(ShaderConvert shader) +{ + switch (shader) + { + case ShaderConvert::RGBA8_TO_FLOAT32_BILN: + case ShaderConvert::RGBA8_TO_FLOAT24_BILN: + case ShaderConvert::RGBA8_TO_FLOAT16_BILN: + case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: + return false; + default: + return true; + } +} + +static inline bool SupportsBilinear(ShaderConvert shader) +{ + switch (shader) + { + case ShaderConvert::RGBA8_TO_FLOAT32: + case ShaderConvert::RGBA8_TO_FLOAT24: + case ShaderConvert::RGBA8_TO_FLOAT16: + case ShaderConvert::RGB5A1_TO_FLOAT16: + return false; + default: + return true; + } +} + enum class PresentShader { COPY = 0, diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 584d16aa57..286003780b 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -577,6 +577,8 @@ void GSDevice11::CloneTexture(GSTexture* src, GSTexture** dest, const GSVector4i void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear) { + pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader)); + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast(shader)].get(), nullptr, linear); } @@ -608,11 +610,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* { ASSERT(sTex); - const bool draw_in_depth = ps == m_convert.ps[static_cast(ShaderConvert::DEPTH_COPY)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT32)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT24)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT16)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGB5A1_TO_FLOAT16)]; + const bool draw_in_depth = dTex && dTex->IsDepthStencil(); BeginScene(); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 7570359cbd..b2f38cbcd1 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -32,20 +32,6 @@ #include #include -static bool IsDepthConvertShader(ShaderConvert i) -{ - return (i == ShaderConvert::DEPTH_COPY || i == ShaderConvert::RGBA8_TO_FLOAT32 || - i == ShaderConvert::RGBA8_TO_FLOAT24 || i == ShaderConvert::RGBA8_TO_FLOAT16 || - i == ShaderConvert::RGB5A1_TO_FLOAT16 || i == ShaderConvert::DATM_0 || - i == ShaderConvert::DATM_1); -} - -static bool IsIntConvertShader(ShaderConvert i) -{ - return (i == ShaderConvert::RGBA8_TO_16_BITS || i == ShaderConvert::FLOAT32_TO_16_BITS || - i == ShaderConvert::FLOAT32_TO_32_BITS); -} - static bool IsDATMConvertShader(ShaderConvert i) { return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); } static D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE GetLoadOpForTexture(GSTexture12* tex) @@ -476,7 +462,7 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */) { - pxAssert(IsDepthConvertShader(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); + pxAssert(HasDepthOutput(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top), int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), @@ -1083,7 +1069,7 @@ bool GSDevice12::CompileConvertPipelines() for (ShaderConvert i = ShaderConvert::COPY; static_cast(i) < static_cast(ShaderConvert::Count); i = static_cast(static_cast(i) + 1)) { - const bool depth = IsDepthConvertShader(i); + const bool depth = HasDepthOutput(i); const int index = static_cast(i); switch (i) diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index a22ace695c..dd6995b4cb 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2516,7 +2516,7 @@ void GSTextureCache::Target::Update() GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0); // FIXME linear or not? - g_gs_device->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert::RGBA8_TO_FLOAT32); + g_gs_device->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert::RGBA8_TO_FLOAT32_BILN); } g_gs_device->Recycle(t); diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index a364392323..71ae88de84 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -809,6 +809,10 @@ bool GSDeviceMTL::Create(HostDisplay* display) case ShaderConvert::RGBA8_TO_FLOAT24: case ShaderConvert::RGBA8_TO_FLOAT16: case ShaderConvert::RGB5A1_TO_FLOAT16: + case ShaderConvert::RGBA8_TO_FLOAT32_BILN: + case ShaderConvert::RGBA8_TO_FLOAT24_BILN: + case ShaderConvert::RGBA8_TO_FLOAT16_BILN: + case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid; pdesc.depthAttachmentPixelFormat = ConvertPixelFormat(GSTexture::Format::DepthStencil); break; @@ -1051,6 +1055,9 @@ void GSDeviceMTL::RenderCopy(GSTexture* sTex, id pipelin void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear) { @autoreleasepool { id pipeline; + + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); + if (shader == ShaderConvert::COPY) pipeline = m_convert_pipeline_copy[dTex->GetFormat() == GSTexture::Format::Color ? 0 : 1]; else diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index a8653371fe..0931f52e5b 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -150,32 +150,95 @@ fragment DepthOut ps_depth_copy(ConvertShaderData data [[stage_in]], ConvertPSDe return res.sample(data.t); } -static float pack_rgba8_depth(float4 unorm) +static float rgba8_to_depth32(half4 unorm) { - return float(as_type(uchar4(unorm * 255.f + 0.5f))) * 0x1p-32f; + return float(as_type(uchar4(unorm * 255.5h))) * 0x1p-32f; } -fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertPSRes res) +static float rgba8_to_depth24(half4 unorm) { - return pack_rgba8_depth(res.sample(data.t)); + return rgba8_to_depth32(half4(unorm.rgb, 0)); } -fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertPSRes res) +static float rgba8_to_depth16(half4 unorm) { - // Same as above but without the alpha channel (24 bits Z) - return pack_rgba8_depth(float4(res.sample(data.t).rgb, 0)); + return float(as_type(uchar2(unorm.rg * 255.5h))) * 0x1p-32f; } -fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res) +static float rgb5a1_to_depth16(half4 unorm) { - return float(as_type(uchar2(res.sample(data.t).rg * 255.f + 0.5f))) * 0x1p-32; -} - -fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res) -{ - uint4 cu = uint4(res.sample(data.t) * 255.f + 0.5f); + uint4 cu = uint4(unorm * 255.5h); uint out = (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000); - return float(out) * 0x1p-32; + return float(out) * 0x1p-32f; +} + +struct ConvertToDepthRes +{ + texture2d texture [[texture(GSMTLTextureIndexNonHW)]]; + half4 sample(float2 coord) + { + // RGBA bilinear on a depth texture is a bad idea, and should never be used + // Might as well let the compiler optimize a bit by telling it exactly what sampler we'll be using here + constexpr sampler s(coord::normalized, filter::nearest, address::clamp_to_edge); + return texture.sample(s, coord); + } + + /// Manual bilinear sampling where we do the bilinear *after* rgba → depth conversion + template + float sample_biln(float2 coord) + { + uint2 dimensions = uint2(texture.get_width(), texture.get_height()); + float2 top_left_f = coord * float2(dimensions) - 0.5f; + int2 top_left = int2(floor(top_left_f)); + uint4 coords = uint4(clamp(int4(top_left, top_left + 1), 0, int2(dimensions - 1).xyxy)); + float2 mix_vals = fract(top_left_f); + + float depthTL = convert(texture.read(coords.xy)); + float depthTR = convert(texture.read(coords.zy)); + float depthBL = convert(texture.read(coords.xw)); + float depthBR = convert(texture.read(coords.zw)); + return mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y); + } +}; + +fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return rgba8_to_depth32(res.sample(data.t)); +} + +fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return rgba8_to_depth24(res.sample(data.t)); +} + +fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return rgba8_to_depth16(res.sample(data.t)); +} + +fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return rgb5a1_to_depth16(res.sample(data.t)); +} + +fragment DepthOut ps_convert_rgba8_float32_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return res.sample_biln(data.t); +} + +fragment DepthOut ps_convert_rgba8_float24_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return res.sample_biln(data.t); +} + +fragment DepthOut ps_convert_rgba8_float16_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return res.sample_biln(data.t); +} + +fragment DepthOut ps_convert_rgb5a1_float16_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res) +{ + return res.sample_biln(data.t); } fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], ConvertPSRes res, diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index b5f2f24fac..d15c3b278f 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1204,6 +1204,8 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear) { + pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader)); + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], linear); } @@ -1228,11 +1230,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture { ASSERT(sTex); - const bool draw_in_depth = ps == m_convert.ps[static_cast(ShaderConvert::DEPTH_COPY)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT32)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT24)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGBA8_TO_FLOAT16)] - || ps == m_convert.ps[static_cast(ShaderConvert::RGB5A1_TO_FLOAT16)]; + const bool draw_in_depth = dTex->IsDepthStencil(); // ************************************ // Init diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 2236c85a62..8399a259a5 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -35,20 +35,6 @@ static u32 s_debug_scope_depth = 0; #endif -static bool IsDepthConvertShader(ShaderConvert i) -{ - return (i == ShaderConvert::DEPTH_COPY || i == ShaderConvert::RGBA8_TO_FLOAT32 || - i == ShaderConvert::RGBA8_TO_FLOAT24 || i == ShaderConvert::RGBA8_TO_FLOAT16 || - i == ShaderConvert::RGB5A1_TO_FLOAT16 || i == ShaderConvert::DATM_0 || - i == ShaderConvert::DATM_1); -} - -static bool IsIntConvertShader(ShaderConvert i) -{ - return (i == ShaderConvert::RGBA8_TO_16_BITS || i == ShaderConvert::FLOAT32_TO_16_BITS || - i == ShaderConvert::FLOAT32_TO_32_BITS); -} - static bool IsDATMConvertShader(ShaderConvert i) { return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); } static VkAttachmentLoadOp GetLoadOpForTexture(GSTextureVK* tex) @@ -572,7 +558,8 @@ void GSDeviceVK::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */) { - pxAssert(IsDepthConvertShader(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); + pxAssert(HasDepthOutput(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top), int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), @@ -1351,7 +1338,7 @@ bool GSDeviceVK::CompileConvertPipelines() for (ShaderConvert i = ShaderConvert::COPY; static_cast(i) < static_cast(ShaderConvert::Count); i = static_cast(static_cast(i) + 1)) { - const bool depth = IsDepthConvertShader(i); + const bool depth = HasDepthOutput(i); const int index = static_cast(i); VkRenderPass rp;