GS:HW: Use 16-bit unorm for HDR

This commit is contained in:
TellowKrinkle 2022-10-09 00:51:41 -05:00 committed by lightningterror
parent 4a7539cd06
commit 9b5dd92dad
16 changed files with 52 additions and 81 deletions

View File

@ -125,7 +125,7 @@ PS_OUTPUT ps_hdr_init(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(round(value.rgb * 255), value.a);
output.c = float4(round(value.rgb * 255) / 65535, value.a);
return output;
}
@ -133,7 +133,7 @@ PS_OUTPUT ps_hdr_resolve(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(float3(int3(value.rgb) & 255) / 255, value.a);
output.c = float4(float3(uint3(value.rgb * 65535.5) & 255) / 255, value.a);
return output;
}

View File

@ -715,7 +715,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy)
if (PS_FBMASK)
{
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
C = (float4)(((uint4)(int4)C & (FbMask ^ 0xFF)) | ((uint4)RT & FbMask));
C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask));
}
}
@ -736,18 +736,9 @@ void ps_dither(inout float3 C, float2 pos_xy)
void ps_color_clamp_wrap(inout float3 C)
{
if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR
{
int3 color = int3(C);
if (PS_DFMT == FMT_16)
color &= (int3)0xF8;
// -128 to 127 gives us longer before we run out of float precision
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
C = float3((color << 24) >> 24);
}
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
else if (SW_BLEND || PS_DITHER || PS_FBMASK)
if (SW_BLEND || PS_DITHER || PS_FBMASK)
{
// Standard Clamp
if (PS_COLCLIP == 0 && PS_HDR == 0)
@ -756,7 +747,7 @@ void ps_color_clamp_wrap(inout float3 C)
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0)
C = (float3)((int3)C & (int3)0xF8);
else if (PS_COLCLIP == 1 && PS_HDR == 0)
else if (PS_COLCLIP == 1 || PS_HDR == 1)
C = (float3)((int3)C & (int3)0xFF);
}
}
@ -952,7 +943,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
ps_fbmask(C, input.p.xy);
#if !PS_NO_COLOR
output.c0 = PS_HDR ? float4(C.rgb, C.a / 255.0f) : C / 255.0f;
output.c0 = PS_HDR ? float4(C.rgb / 65535.0f, C.a / 255.0f) : C / 255.0f;
#if !PS_NO_COLOR1
output.c1 = (float4)(alpha_blend);
#endif

View File

@ -322,7 +322,7 @@ void ps_datm0()
void ps_hdr_init()
{
vec4 value = sample_c();
SV_Target0 = vec4(round(value.rgb * 255.0f), value.a);
SV_Target0 = vec4(round(value.rgb * 255.0f) / 65535.0f, value.a);
}
#endif
@ -330,7 +330,7 @@ void ps_hdr_init()
void ps_hdr_resolve()
{
vec4 value = sample_c();
SV_Target0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a);
SV_Target0 = vec4(vec3(uvec3(value.rgb * 65535.0f) & 255u) / 255.0f, value.a);
}
#endif

View File

@ -620,7 +620,7 @@ void ps_fbmask(inout vec4 C)
// FIXME do I need special case for 16 bits
#if PS_FBMASK
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask));
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
#endif
}
@ -638,18 +638,9 @@ void ps_dither(inout vec3 C)
void ps_color_clamp_wrap(inout vec3 C)
{
#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR
ivec3 color = ivec3(C);
#if PS_DFMT == FMT_16
color &= 0xF8;
#endif
// -128 to 127 gives us longer before we run out of float precision
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
C = vec3((color << 24) >> 24);
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
#elif SW_BLEND || PS_DITHER || PS_FBMASK
#if SW_BLEND || PS_DITHER || PS_FBMASK
// Correct the Color value based on the output format
#if PS_COLCLIP == 0 && PS_HDR == 0
@ -666,7 +657,7 @@ void ps_color_clamp_wrap(inout vec3 C)
#if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8));
#elif PS_COLCLIP == 1 && PS_HDR == 0
#elif PS_COLCLIP == 1 || PS_HDR == 1
C = vec3(ivec3(C) & ivec3(0xFF));
#endif
@ -934,8 +925,8 @@ void ps_main()
ps_fbmask(C);
#if !PS_NO_COLOR
#if PS_HDR
SV_Target0 = vec4(C.rgb, C.a / 255.0f);
#if PS_HDR == 1
SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
#else
SV_Target0 = C / 255.0f;
#endif

View File

@ -94,7 +94,7 @@ void ps_datm0()
void ps_hdr_init()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(roundEven(value.rgb * 255.0f), value.a);
o_col0 = vec4(roundEven(value.rgb * 255.0f) / 65535.0f, value.a);
}
#endif
@ -102,7 +102,7 @@ void ps_hdr_init()
void ps_hdr_resolve()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a);
o_col0 = vec4(vec3(uvec3(value.rgb * 65535.5f) & 255u) / 255.0f, value.a);
}
#endif

View File

@ -946,7 +946,7 @@ void ps_fbmask(inout vec4 C)
{
#if PS_FBMASK
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask));
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
#endif
}
@ -967,18 +967,9 @@ void ps_dither(inout vec3 C)
void ps_color_clamp_wrap(inout vec3 C)
{
#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR
ivec3 color = ivec3(C);
#if PS_DFMT == FMT_16
color &= 0xF8;
#endif
// -128 to 127 gives us longer before we run out of float precision
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
C = vec3((color << 24) >> 24);
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
#elif SW_BLEND || PS_DITHER || PS_FBMASK
#if SW_BLEND || PS_DITHER || PS_FBMASK
// Correct the Color value based on the output format
#if PS_COLCLIP == 0 && PS_HDR == 0
@ -995,7 +986,7 @@ void ps_color_clamp_wrap(inout vec3 C)
#if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8));
#elif PS_COLCLIP == 1 && PS_HDR == 0
#elif PS_COLCLIP == 1 || PS_HDR == 1
C = vec3(ivec3(C) & ivec3(0xFF));
#endif
@ -1235,8 +1226,8 @@ void main()
ps_fbmask(C);
#if !PS_NO_COLOR
#if PS_HDR
o_col0 = vec4(C.rgb, C.a / 255.0f);
#if PS_HDR == 1
o_col0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
#else
o_col0 = C / 255.0f;
#endif

View File

@ -84,7 +84,7 @@ u32 GSTexture::GetCompressedBytesPerBlock() const
static constexpr u32 bytes_per_block[] = {
1, // Invalid
4, // Color/RGBA8
16, // HDRColor/RGBA32F
8, // HDRColor/RGBA16
32, // DepthStencil
1, // UNorm8/R8
2, // UInt16/R16UI

View File

@ -39,7 +39,7 @@ public:
{
Invalid = 0, ///< Used for initialization
Color, ///< Standard (RGBA8) color texture
HDRColor, ///< Float-based color texture for colclip emulation (RGBA32F)
HDRColor, ///< Color texture with more bits for colclip emulation (RGBA16Unorm)
DepthStencil, ///< Depth stencil texture
UNorm8, ///< A8UNorm texture for paletted textures and the OSD font
UInt16, ///< UInt16 texture for reading back 16-bit depth

View File

@ -465,7 +465,7 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height
switch (format)
{
case GSTexture::Format::Color: dxformat = DXGI_FORMAT_R8G8B8A8_UNORM; break;
case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R16G16B16A16_UNORM; break;
case GSTexture::Format::DepthStencil: dxformat = DXGI_FORMAT_R32G8X24_TYPELESS; break;
case GSTexture::Format::UNorm8: dxformat = DXGI_FORMAT_A8_UNORM; break;
case GSTexture::Format::UInt16: dxformat = DXGI_FORMAT_R16_UINT; break;

View File

@ -284,7 +284,7 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f
static constexpr std::array<std::array<DXGI_FORMAT, 4>, static_cast<int>(GSTexture::Format::BC7) + 1> s_format_mapping = {{
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // Invalid
{DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN}, // Color
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN}, // HDRColor
{DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_UNKNOWN}, // HDRColor
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_D32_FLOAT_S8X24_UINT}, // DepthStencil
{DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN}, // UNorm8
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_UNKNOWN}, // UInt16
@ -1161,7 +1161,7 @@ bool GSDevice12::CompileConvertPipelines()
{
pxAssert(!arr[ds]);
gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM);
gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R16G16B16A16_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM);
gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN);
arr[ds] = gpb.Create(g_d3d12_context->GetDevice(), m_shader_cache, false);
if (!arr[ds])

View File

@ -2767,7 +2767,6 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
// A fast algo that requires 2 passes
GL_INS("COLCLIP Fast HDR mode ENABLED");
m_conf.ps.hdr = 1;
m_conf.ps.colclip = accumulation_blend; // reuse as a flag for accumulation blend
blend_mix = false;
sw_blending = true; // Enable sw blending for the HDR algo
}
@ -2865,13 +2864,23 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
m_conf.ps.blend_d = 2;
}
if (m_conf.ps.blend_a == 2)
if (blend.op == GSDevice::OP_REV_SUBTRACT)
{
// The blend unit does a reverse subtraction so it means
// the shader must output a positive value.
// Replace 0 - Cs by Cs - 0
m_conf.ps.blend_a = m_conf.ps.blend_b;
m_conf.ps.blend_b = 2;
ASSERT(m_conf.ps.blend_a == 2);
if (m_conf.ps.hdr)
{
// HDR uses unorm, which is always positive
// Have the shader do the inversion, then clip to remove the negative
m_conf.blend.op = GSDevice::OP_ADD;
}
else
{
// The blend unit does a reverse subtraction so it means
// the shader must output a positive value.
// Replace 0 - Cs by Cs - 0
m_conf.ps.blend_a = m_conf.ps.blend_b;
m_conf.ps.blend_b = 2;
}
}
// Dual source output not needed (accumulation blend replaces it with ONE).

View File

@ -374,7 +374,7 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
case GSTexture::Format::UInt16: return MTLPixelFormatR16Uint;
case GSTexture::Format::UNorm8: return MTLPixelFormatA8Unorm;
case GSTexture::Format::Color: return MTLPixelFormatRGBA8Unorm;
case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA32Float;
case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA16Unorm;
case GSTexture::Format::DepthStencil: return MTLPixelFormatDepth32Float_Stencil8;
case GSTexture::Format::Invalid: return MTLPixelFormatInvalid;
case GSTexture::Format::BC1: return MTLPixelFormatBC1_RGBA;

View File

@ -111,16 +111,16 @@ fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn<
return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX;
}
fragment half4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<half> tex)
fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex)
{
half4 in = tex.read(p);
return half4(round(in.rgb * 255.h), in.a);
float4 in = tex.read(p);
return float4(round(in.rgb * 255.f) / 65535.f, in.a);
}
fragment float4 ps_hdr_resolve(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);
return float4(float3(int3(in.rgb) & 255) / 255.f, in.a);
return float4(float3(uint3(in.rgb * 65535.5f) & 255) / 255.f, in.a);
}
fragment float4 ps_filter_transparency(ConvertShaderData data [[stage_in]], ConvertPSRes res)

View File

@ -784,17 +784,6 @@ struct PSMain
void ps_color_clamp_wrap(thread float4& C)
{
if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR
{
int3 color = int3(C.rgb);
if (PS_DFMT == FMT_16)
color &= 0xF8;
// -128 to 127 gives us longer before we run out of float precision
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
C.rgb = float3(char3(color));
return;
}
// When dithering the bottom 3 bits become meaningless and cause lines in the picture so we need to limit the color depth on dithered items
if (!SW_BLEND && !PS_DITHER && !PS_FBMASK)
return;
@ -812,7 +801,7 @@ struct PSMain
if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0)
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C.rgb = float3(short3(C.rgb) & 0xF8);
else if (PS_COLCLIP && !PS_HDR)
else if (PS_COLCLIP || PS_HDR)
C.rgb = float3(short3(C.rgb) & 0xFF);
}
@ -989,7 +978,7 @@ struct PSMain
ps_fbmask(C);
if (PS_COLOR0)
out.c0 = PS_HDR ? float4(C.rgb, C.a / 255.f) : C / 255.f;
out.c0 = PS_HDR ? float4(C.rgb / 65535.f, C.a / 255.f) : C / 255.f;
if (PS_COLOR0 && PS_ONLY_ALPHA)
out.c0.rgb = 0;
if (PS_COLOR1)

View File

@ -225,10 +225,10 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
// 4 channel float
case Format::HDRColor:
gl_fmt = GL_RGBA32F;
gl_fmt = GL_RGBA16;
m_int_format = GL_RGBA;
m_int_type = GL_FLOAT;
m_int_shift = 4;
m_int_type = GL_UNSIGNED_SHORT;
m_int_shift = 3;
break;
// Depth buffer

View File

@ -378,7 +378,7 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const
static constexpr std::array<VkFormat, static_cast<int>(GSTexture::Format::BC7) + 1> s_format_mapping = {{
VK_FORMAT_UNDEFINED, // Invalid
VK_FORMAT_R8G8B8A8_UNORM, // Color
VK_FORMAT_R32G32B32A32_SFLOAT, // HDRColor
VK_FORMAT_R16G16B16A16_UNORM, // HDRColor
VK_FORMAT_D32_SFLOAT_S8_UINT, // DepthStencil
VK_FORMAT_R8_UNORM, // UNorm8
VK_FORMAT_R16_UINT, // UInt16