GS: Manually do bilinear sampling when converting RGBA to depth

Shader bilinear doesn't properly handle the case where r overflows into g (or g overflows into b, etc)
This commit is contained in:
TellowKrinkle 2022-08-16 15:19:34 -05:00 committed by refractionpcsx2
parent 882c09b870
commit 91601e5647
12 changed files with 415 additions and 111 deletions

View File

@ -162,36 +162,98 @@ PS_OUTPUT ps_convert_float16_rgb5a1(PS_INPUT input)
return output;
}
float rgba8_to_depth32(float4 val)
{
uint4 c = uint4(val * 255.5f);
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
}
float rgba8_to_depth24(float4 val)
{
uint3 c = uint3(val.rgb * 255.5f);
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
}
float rgba8_to_depth16(float4 val)
{
uint2 c = uint2(val.rg * 255.5f);
return float(c.r | (c.g << 8)) * exp2(-32.0f);
}
float rgb5a1_to_depth16(float4 val)
{
uint4 c = uint4(val * 255.5f);
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
}
float ps_convert_rgba8_float32(PS_INPUT input) : SV_Depth
{
// Convert a RRGBA texture into a float depth texture
uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f);
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
return rgba8_to_depth32(sample_c(input.t));
}
float ps_convert_rgba8_float24(PS_INPUT input) : SV_Depth
{
// Same as above but without the alpha channel (24 bits Z)
// Convert a RRGBA texture into a float depth texture
uint3 c = uint3(sample_c(input.t).rgb * 255.0f + 0.5f);
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
return rgba8_to_depth24(sample_c(input.t));
}
float ps_convert_rgba8_float16(PS_INPUT input) : SV_Depth
{
// Same as above but without the A/B channels (16 bits Z)
// Convert a RRGBA texture into a float depth texture
uint2 c = uint2(sample_c(input.t).rg * 255.0f + 0.5f);
return float(c.r | (c.g << 8)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
return rgba8_to_depth16(sample_c(input.t));
}
float ps_convert_rgb5a1_float16(PS_INPUT input) : SV_Depth
{
// Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
uint4 c = uint4(sample_c(input.t) * 255.0f + 0.5f);
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
return rgb5a1_to_depth16(sample_c(input.t));
}
#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \
uint width, height; \
Texture.GetDimensions(width, height); \
float2 top_left_f = input.t * float2(width, height) - 0.5f; \
int2 top_left = int2(floor(top_left_f)); \
int4 coords = clamp(int4(top_left, top_left + 1), int4(0, 0, 0, 0), int2(width - 1, height - 1).xyxy); \
float2 mix_vals = frac(top_left_f); \
float depthTL = CONVERT_FN(Texture.Load(int3(coords.xy, 0))); \
float depthTR = CONVERT_FN(Texture.Load(int3(coords.zy, 0))); \
float depthBL = CONVERT_FN(Texture.Load(int3(coords.xw, 0))); \
float depthBR = CONVERT_FN(Texture.Load(int3(coords.zw, 0))); \
return lerp(lerp(depthTL, depthTR, mix_vals.x), lerp(depthBL, depthBR, mix_vals.x), mix_vals.y);
float ps_convert_rgba8_float32_biln(PS_INPUT input) : SV_Depth
{
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32);
}
float ps_convert_rgba8_float24_biln(PS_INPUT input) : SV_Depth
{
// Same as above but without the alpha channel (24 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24);
}
float ps_convert_rgba8_float16_biln(PS_INPUT input) : SV_Depth
{
// Same as above but without the A/B channels (16 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16);
}
float ps_convert_rgb5a1_float16_biln(PS_INPUT input) : SV_Depth
{
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16);
}
PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)

View File

@ -97,12 +97,35 @@ void ps_convert_float16_rgb5a1()
}
#endif
float rgba8_to_depth32(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
}
float rgba8_to_depth24(vec4 unorm)
{
uvec3 c = uvec3(unorm.rgb * vec3(255.5f));
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
}
float rgba8_to_depth16(vec4 unorm)
{
uvec2 c = uvec2(unorm.rg * vec2(255.5f));
return float(c.r | (c.g << 8)) * exp2(-32.0f);
}
float rgb5a1_to_depth16(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
}
#ifdef ps_convert_rgba8_float32
void ps_convert_rgba8_float32()
{
// Convert a RRGBA texture into a float depth texture
uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f));
gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth32(sample_c());
}
#endif
@ -111,9 +134,8 @@ void ps_convert_rgba8_float24()
{
// Same as above but without the alpha channel (24 bits Z)
// Convert a RRGBA texture into a float depth texture
uvec3 c = uvec3(sample_c().rgb * vec3(255.0f) + vec3(0.5f));
gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth24(sample_c());
}
#endif
@ -122,18 +144,64 @@ void ps_convert_rgba8_float16()
{
// Same as above but without the A/B channels (16 bits Z)
// Convert a RRGBA texture into a float depth texture
uvec2 c = uvec2(sample_c().rg * vec2(255.0f) + vec2(0.5f));
gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth16(sample_c());
}
#endif
#ifdef ps_convert_rgb5a1_float16
void ps_convert_rgb5a1_float16()
{
// Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
uvec4 c = uvec4(sample_c() * vec4(255.0f) + vec4(0.5f));
gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
gl_FragDepth = rgb5a1_to_depth16(sample_c());
}
#endif
#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \
ivec2 dims = textureSize(TextureSampler, 0); \
vec2 top_left_f = PSin_t * vec2(dims) - 0.5f; \
ivec2 top_left = ivec2(floor(top_left_f)); \
ivec4 coords = clamp(ivec4(top_left, top_left + 1), ivec4(0), dims.xyxy - 1); \
vec2 mix_vals = fract(top_left_f); \
float depthTL = CONVERT_FN(texelFetch(TextureSampler, coords.xy, 0)); \
float depthTR = CONVERT_FN(texelFetch(TextureSampler, coords.zy, 0)); \
float depthBL = CONVERT_FN(texelFetch(TextureSampler, coords.xw, 0)); \
float depthBR = CONVERT_FN(texelFetch(TextureSampler, coords.zw, 0)); \
gl_FragDepth = mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y);
#ifdef ps_convert_rgba8_float32_biln
void ps_convert_rgba8_float32_biln()
{
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32);
}
#endif
#ifdef ps_convert_rgba8_float24_biln
void ps_convert_rgba8_float24_biln()
{
// Same as above but without the alpha channel (24 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24);
}
#endif
#ifdef ps_convert_rgba8_float16_biln
void ps_convert_rgba8_float16_biln()
{
// Same as above but without the A/B channels (16 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16);
}
#endif
#ifdef ps_convert_rgb5a1_float16_biln
void ps_convert_rgb5a1_float16_biln()
{
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16);
}
#endif

View File

@ -128,12 +128,35 @@ void ps_convert_float16_rgb5a1()
}
#endif
float rgba8_to_depth32(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
}
float rgba8_to_depth24(vec4 unorm)
{
uvec3 c = uvec3(unorm.rgb * vec3(255.5f));
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
}
float rgba8_to_depth16(vec4 unorm)
{
uvec2 c = uvec2(unorm.rg * vec2(255.5f));
return float(c.r | (c.g << 8)) * exp2(-32.0f);
}
float rgb5a1_to_depth16(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
}
#ifdef ps_convert_rgba8_float32
void ps_convert_rgba8_float32()
{
// Convert a RRGBA texture into a float depth texture
uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f));
gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth32(sample_c(v_tex));
}
#endif
@ -142,9 +165,8 @@ void ps_convert_rgba8_float24()
{
// Same as above but without the alpha channel (24 bits Z)
// Convert a RRGBA texture into a float depth texture
uvec3 c = uvec3(sample_c(v_tex).rgb * vec3(255.0f) + vec3(0.5f));
gl_FragDepth = float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth24(sample_c(v_tex));
}
#endif
@ -153,18 +175,64 @@ void ps_convert_rgba8_float16()
{
// Same as above but without the A/B channels (16 bits Z)
// Convert a RRGBA texture into a float depth texture
uvec2 c = uvec2(sample_c(v_tex).rg * vec2(255.0f) + vec2(0.5f));
gl_FragDepth = float(c.r | (c.g << 8)) * exp2(-32.0f);
// Convert an RGBA texture into a float depth texture
gl_FragDepth = rgba8_to_depth16(sample_c(v_tex));
}
#endif
#ifdef ps_convert_rgb5a1_float16
void ps_convert_rgb5a1_float16()
{
// Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
uvec4 c = uvec4(sample_c(v_tex) * vec4(255.0f) + vec4(0.5f));
gl_FragDepth = float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
gl_FragDepth = rgb5a1_to_depth16(sample_c(v_tex));
}
#endif
#define SAMPLE_RGBA_DEPTH_BILN(CONVERT_FN) \
ivec2 dims = textureSize(samp0, 0); \
vec2 top_left_f = v_tex * vec2(dims) - 0.5f; \
ivec2 top_left = ivec2(floor(top_left_f)); \
ivec4 coords = clamp(ivec4(top_left, top_left + 1), ivec4(0), dims.xyxy - 1); \
vec2 mix_vals = fract(top_left_f); \
float depthTL = CONVERT_FN(texelFetch(samp0, coords.xy, 0)); \
float depthTR = CONVERT_FN(texelFetch(samp0, coords.zy, 0)); \
float depthBL = CONVERT_FN(texelFetch(samp0, coords.xw, 0)); \
float depthBR = CONVERT_FN(texelFetch(samp0, coords.zw, 0)); \
gl_FragDepth = mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y);
#ifdef ps_convert_rgba8_float32_biln
void ps_convert_rgba8_float32_biln()
{
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth32);
}
#endif
#ifdef ps_convert_rgba8_float24_biln
void ps_convert_rgba8_float24_biln()
{
// Same as above but without the alpha channel (24 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth24);
}
#endif
#ifdef ps_convert_rgba8_float16_biln
void ps_convert_rgba8_float16_biln()
{
// Same as above but without the A/B channels (16 bits Z)
// Convert an RGBA texture into a float depth texture
SAMPLE_RGBA_DEPTH_BILN(rgba8_to_depth16);
}
#endif
#ifdef ps_convert_rgb5a1_float16_biln
void ps_convert_rgb5a1_float16_biln()
{
// Convert an RGB5A1 (saved as RGBA8) color to a 16 bit Z
SAMPLE_RGBA_DEPTH_BILN(rgb5a1_to_depth16);
}
#endif

View File

@ -23,23 +23,27 @@ const char* shaderName(ShaderConvert value)
switch (value)
{
// clang-format off
case ShaderConvert::COPY: return "ps_copy";
case ShaderConvert::RGBA8_TO_16_BITS: return "ps_convert_rgba8_16bits";
case ShaderConvert::DATM_1: return "ps_datm1";
case ShaderConvert::DATM_0: return "ps_datm0";
case ShaderConvert::MOD_256: return "ps_mod256";
case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency";
case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_RGBA8: return "ps_convert_float32_rgba8";
case ShaderConvert::FLOAT16_TO_RGB5A1: return "ps_convert_float16_rgb5a1";
case ShaderConvert::RGBA8_TO_FLOAT32: return "ps_convert_rgba8_float32";
case ShaderConvert::RGBA8_TO_FLOAT24: return "ps_convert_rgba8_float24";
case ShaderConvert::RGBA8_TO_FLOAT16: return "ps_convert_rgba8_float16";
case ShaderConvert::RGB5A1_TO_FLOAT16: return "ps_convert_rgb5a1_float16";
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
case ShaderConvert::YUV: return "ps_yuv";
case ShaderConvert::COPY: return "ps_copy";
case ShaderConvert::RGBA8_TO_16_BITS: return "ps_convert_rgba8_16bits";
case ShaderConvert::DATM_1: return "ps_datm1";
case ShaderConvert::DATM_0: return "ps_datm0";
case ShaderConvert::MOD_256: return "ps_mod256";
case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency";
case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_RGBA8: return "ps_convert_float32_rgba8";
case ShaderConvert::FLOAT16_TO_RGB5A1: return "ps_convert_float16_rgb5a1";
case ShaderConvert::RGBA8_TO_FLOAT32: return "ps_convert_rgba8_float32";
case ShaderConvert::RGBA8_TO_FLOAT24: return "ps_convert_rgba8_float24";
case ShaderConvert::RGBA8_TO_FLOAT16: return "ps_convert_rgba8_float16";
case ShaderConvert::RGB5A1_TO_FLOAT16: return "ps_convert_rgb5a1_float16";
case ShaderConvert::RGBA8_TO_FLOAT32_BILN: return "ps_convert_rgba8_float32_biln";
case ShaderConvert::RGBA8_TO_FLOAT24_BILN: return "ps_convert_rgba8_float24_biln";
case ShaderConvert::RGBA8_TO_FLOAT16_BILN: return "ps_convert_rgba8_float16_biln";
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
case ShaderConvert::YUV: return "ps_yuv";
// clang-format on
default:
ASSERT(0);

View File

@ -45,12 +45,75 @@ enum class ShaderConvert
RGBA8_TO_FLOAT24,
RGBA8_TO_FLOAT16,
RGB5A1_TO_FLOAT16,
RGBA8_TO_FLOAT32_BILN,
RGBA8_TO_FLOAT24_BILN,
RGBA8_TO_FLOAT16_BILN,
RGB5A1_TO_FLOAT16_BILN,
DEPTH_COPY,
RGBA_TO_8I,
YUV,
Count
};
static inline bool HasDepthOutput(ShaderConvert shader)
{
switch (shader)
{
case ShaderConvert::RGBA8_TO_FLOAT32:
case ShaderConvert::RGBA8_TO_FLOAT24:
case ShaderConvert::RGBA8_TO_FLOAT16:
case ShaderConvert::RGB5A1_TO_FLOAT16:
case ShaderConvert::RGBA8_TO_FLOAT32_BILN:
case ShaderConvert::RGBA8_TO_FLOAT24_BILN:
case ShaderConvert::RGBA8_TO_FLOAT16_BILN:
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN:
case ShaderConvert::DEPTH_COPY:
return true;
default:
return false;
}
}
static inline bool HasStencilOutput(ShaderConvert shader)
{
switch (shader)
{
case ShaderConvert::DATM_0:
case ShaderConvert::DATM_1:
return true;
default:
return false;
}
}
static inline bool SupportsNearest(ShaderConvert shader)
{
switch (shader)
{
case ShaderConvert::RGBA8_TO_FLOAT32_BILN:
case ShaderConvert::RGBA8_TO_FLOAT24_BILN:
case ShaderConvert::RGBA8_TO_FLOAT16_BILN:
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN:
return false;
default:
return true;
}
}
static inline bool SupportsBilinear(ShaderConvert shader)
{
switch (shader)
{
case ShaderConvert::RGBA8_TO_FLOAT32:
case ShaderConvert::RGBA8_TO_FLOAT24:
case ShaderConvert::RGBA8_TO_FLOAT16:
case ShaderConvert::RGB5A1_TO_FLOAT16:
return false;
default:
return true;
}
}
enum class PresentShader
{
COPY = 0,

View File

@ -577,6 +577,8 @@ void GSDevice11::CloneTexture(GSTexture* src, GSTexture** dest, const GSVector4i
void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
{
pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader));
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), nullptr, linear);
}
@ -608,11 +610,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
{
ASSERT(sTex);
const bool draw_in_depth = ps == m_convert.ps[static_cast<int>(ShaderConvert::DEPTH_COPY)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT32)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT24)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT16)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGB5A1_TO_FLOAT16)];
const bool draw_in_depth = dTex && dTex->IsDepthStencil();
BeginScene();

View File

@ -32,20 +32,6 @@
#include <sstream>
#include <limits>
static bool IsDepthConvertShader(ShaderConvert i)
{
return (i == ShaderConvert::DEPTH_COPY || i == ShaderConvert::RGBA8_TO_FLOAT32 ||
i == ShaderConvert::RGBA8_TO_FLOAT24 || i == ShaderConvert::RGBA8_TO_FLOAT16 ||
i == ShaderConvert::RGB5A1_TO_FLOAT16 || i == ShaderConvert::DATM_0 ||
i == ShaderConvert::DATM_1);
}
static bool IsIntConvertShader(ShaderConvert i)
{
return (i == ShaderConvert::RGBA8_TO_16_BITS || i == ShaderConvert::FLOAT32_TO_16_BITS ||
i == ShaderConvert::FLOAT32_TO_32_BITS);
}
static bool IsDATMConvertShader(ShaderConvert i) { return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); }
static D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE GetLoadOpForTexture(GSTexture12* tex)
@ -476,7 +462,7 @@ void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */)
{
pxAssert(IsDepthConvertShader(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil));
pxAssert(HasDepthOutput(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil));
GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top),
int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top),
@ -1083,7 +1069,7 @@ bool GSDevice12::CompileConvertPipelines()
for (ShaderConvert i = ShaderConvert::COPY; static_cast<int>(i) < static_cast<int>(ShaderConvert::Count);
i = static_cast<ShaderConvert>(static_cast<int>(i) + 1))
{
const bool depth = IsDepthConvertShader(i);
const bool depth = HasDepthOutput(i);
const int index = static_cast<int>(i);
switch (i)

View File

@ -2516,7 +2516,7 @@ void GSTextureCache::Target::Update()
GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0);
// FIXME linear or not?
g_gs_device->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert::RGBA8_TO_FLOAT32);
g_gs_device->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert::RGBA8_TO_FLOAT32_BILN);
}
g_gs_device->Recycle(t);

View File

@ -809,6 +809,10 @@ bool GSDeviceMTL::Create(HostDisplay* display)
case ShaderConvert::RGBA8_TO_FLOAT24:
case ShaderConvert::RGBA8_TO_FLOAT16:
case ShaderConvert::RGB5A1_TO_FLOAT16:
case ShaderConvert::RGBA8_TO_FLOAT32_BILN:
case ShaderConvert::RGBA8_TO_FLOAT24_BILN:
case ShaderConvert::RGBA8_TO_FLOAT16_BILN:
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN:
pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid;
pdesc.depthAttachmentPixelFormat = ConvertPixelFormat(GSTexture::Format::DepthStencil);
break;
@ -1051,6 +1055,9 @@ void GSDeviceMTL::RenderCopy(GSTexture* sTex, id<MTLRenderPipelineState> pipelin
void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
{ @autoreleasepool {
id<MTLRenderPipelineState> pipeline;
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
if (shader == ShaderConvert::COPY)
pipeline = m_convert_pipeline_copy[dTex->GetFormat() == GSTexture::Format::Color ? 0 : 1];
else

View File

@ -150,32 +150,95 @@ fragment DepthOut ps_depth_copy(ConvertShaderData data [[stage_in]], ConvertPSDe
return res.sample(data.t);
}
static float pack_rgba8_depth(float4 unorm)
static float rgba8_to_depth32(half4 unorm)
{
return float(as_type<uint>(uchar4(unorm * 255.f + 0.5f))) * 0x1p-32f;
return float(as_type<uint>(uchar4(unorm * 255.5h))) * 0x1p-32f;
}
fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertPSRes res)
static float rgba8_to_depth24(half4 unorm)
{
return pack_rgba8_depth(res.sample(data.t));
return rgba8_to_depth32(half4(unorm.rgb, 0));
}
fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertPSRes res)
static float rgba8_to_depth16(half4 unorm)
{
// Same as above but without the alpha channel (24 bits Z)
return pack_rgba8_depth(float4(res.sample(data.t).rgb, 0));
return float(as_type<ushort>(uchar2(unorm.rg * 255.5h))) * 0x1p-32f;
}
fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res)
static float rgb5a1_to_depth16(half4 unorm)
{
return float(as_type<ushort>(uchar2(res.sample(data.t).rg * 255.f + 0.5f))) * 0x1p-32;
}
fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
uint4 cu = uint4(res.sample(data.t) * 255.f + 0.5f);
uint4 cu = uint4(unorm * 255.5h);
uint out = (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000);
return float(out) * 0x1p-32;
return float(out) * 0x1p-32f;
}
struct ConvertToDepthRes
{
texture2d<half> texture [[texture(GSMTLTextureIndexNonHW)]];
half4 sample(float2 coord)
{
// RGBA bilinear on a depth texture is a bad idea, and should never be used
// Might as well let the compiler optimize a bit by telling it exactly what sampler we'll be using here
constexpr sampler s(coord::normalized, filter::nearest, address::clamp_to_edge);
return texture.sample(s, coord);
}
/// Manual bilinear sampling where we do the bilinear *after* rgba → depth conversion
template <float (&convert)(half4)>
float sample_biln(float2 coord)
{
uint2 dimensions = uint2(texture.get_width(), texture.get_height());
float2 top_left_f = coord * float2(dimensions) - 0.5f;
int2 top_left = int2(floor(top_left_f));
uint4 coords = uint4(clamp(int4(top_left, top_left + 1), 0, int2(dimensions - 1).xyxy));
float2 mix_vals = fract(top_left_f);
float depthTL = convert(texture.read(coords.xy));
float depthTR = convert(texture.read(coords.zy));
float depthBL = convert(texture.read(coords.xw));
float depthBR = convert(texture.read(coords.zw));
return mix(mix(depthTL, depthTR, mix_vals.x), mix(depthBL, depthBR, mix_vals.x), mix_vals.y);
}
};
fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return rgba8_to_depth32(res.sample(data.t));
}
fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return rgba8_to_depth24(res.sample(data.t));
}
fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return rgba8_to_depth16(res.sample(data.t));
}
fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return rgb5a1_to_depth16(res.sample(data.t));
}
fragment DepthOut ps_convert_rgba8_float32_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return res.sample_biln<rgba8_to_depth32>(data.t);
}
fragment DepthOut ps_convert_rgba8_float24_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return res.sample_biln<rgba8_to_depth24>(data.t);
}
fragment DepthOut ps_convert_rgba8_float16_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return res.sample_biln<rgba8_to_depth16>(data.t);
}
fragment DepthOut ps_convert_rgb5a1_float16_biln(ConvertShaderData data [[stage_in]], ConvertToDepthRes res)
{
return res.sample_biln<rgb5a1_to_depth16>(data.t);
}
fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], ConvertPSRes res,

View File

@ -1204,6 +1204,8 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
{
pxAssert(dTex->IsDepthStencil() == HasDepthOutput(shader));
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], linear);
}
@ -1228,11 +1230,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
{
ASSERT(sTex);
const bool draw_in_depth = ps == m_convert.ps[static_cast<int>(ShaderConvert::DEPTH_COPY)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT32)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT24)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGBA8_TO_FLOAT16)]
|| ps == m_convert.ps[static_cast<int>(ShaderConvert::RGB5A1_TO_FLOAT16)];
const bool draw_in_depth = dTex->IsDepthStencil();
// ************************************
// Init

View File

@ -35,20 +35,6 @@
static u32 s_debug_scope_depth = 0;
#endif
static bool IsDepthConvertShader(ShaderConvert i)
{
return (i == ShaderConvert::DEPTH_COPY || i == ShaderConvert::RGBA8_TO_FLOAT32 ||
i == ShaderConvert::RGBA8_TO_FLOAT24 || i == ShaderConvert::RGBA8_TO_FLOAT16 ||
i == ShaderConvert::RGB5A1_TO_FLOAT16 || i == ShaderConvert::DATM_0 ||
i == ShaderConvert::DATM_1);
}
static bool IsIntConvertShader(ShaderConvert i)
{
return (i == ShaderConvert::RGBA8_TO_16_BITS || i == ShaderConvert::FLOAT32_TO_16_BITS ||
i == ShaderConvert::FLOAT32_TO_32_BITS);
}
static bool IsDATMConvertShader(ShaderConvert i) { return (i == ShaderConvert::DATM_0 || i == ShaderConvert::DATM_1); }
static VkAttachmentLoadOp GetLoadOpForTexture(GSTextureVK* tex)
@ -572,7 +558,8 @@ void GSDeviceVK::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r,
void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */)
{
pxAssert(IsDepthConvertShader(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil));
pxAssert(HasDepthOutput(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil));
pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader));
GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top),
int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top),
@ -1351,7 +1338,7 @@ bool GSDeviceVK::CompileConvertPipelines()
for (ShaderConvert i = ShaderConvert::COPY; static_cast<int>(i) < static_cast<int>(ShaderConvert::Count);
i = static_cast<ShaderConvert>(static_cast<int>(i) + 1))
{
const bool depth = IsDepthConvertShader(i);
const bool depth = HasDepthOutput(i);
const int index = static_cast<int>(i);
VkRenderPass rp;