mirror of https://github.com/PCSX2/pcsx2.git
GS:HW: Use 16-bit unorm for HDR
This commit is contained in:
parent
4a7539cd06
commit
9b5dd92dad
|
@ -125,7 +125,7 @@ PS_OUTPUT ps_hdr_init(PS_INPUT input)
|
|||
{
|
||||
PS_OUTPUT output;
|
||||
float4 value = sample_c(input.t);
|
||||
output.c = float4(round(value.rgb * 255), value.a);
|
||||
output.c = float4(round(value.rgb * 255) / 65535, value.a);
|
||||
return output;
|
||||
}
|
||||
|
||||
|
@ -133,7 +133,7 @@ PS_OUTPUT ps_hdr_resolve(PS_INPUT input)
|
|||
{
|
||||
PS_OUTPUT output;
|
||||
float4 value = sample_c(input.t);
|
||||
output.c = float4(float3(int3(value.rgb) & 255) / 255, value.a);
|
||||
output.c = float4(float3(uint3(value.rgb * 65535.5) & 255) / 255, value.a);
|
||||
return output;
|
||||
}
|
||||
|
||||
|
|
|
@ -715,7 +715,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy)
|
|||
if (PS_FBMASK)
|
||||
{
|
||||
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
|
||||
C = (float4)(((uint4)(int4)C & (FbMask ^ 0xFF)) | ((uint4)RT & FbMask));
|
||||
C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -736,18 +736,9 @@ void ps_dither(inout float3 C, float2 pos_xy)
|
|||
|
||||
void ps_color_clamp_wrap(inout float3 C)
|
||||
{
|
||||
if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR
|
||||
{
|
||||
int3 color = int3(C);
|
||||
if (PS_DFMT == FMT_16)
|
||||
color &= (int3)0xF8;
|
||||
// -128 to 127 gives us longer before we run out of float precision
|
||||
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
|
||||
C = float3((color << 24) >> 24);
|
||||
}
|
||||
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
|
||||
// so we need to limit the color depth on dithered items
|
||||
else if (SW_BLEND || PS_DITHER || PS_FBMASK)
|
||||
if (SW_BLEND || PS_DITHER || PS_FBMASK)
|
||||
{
|
||||
// Standard Clamp
|
||||
if (PS_COLCLIP == 0 && PS_HDR == 0)
|
||||
|
@ -756,7 +747,7 @@ void ps_color_clamp_wrap(inout float3 C)
|
|||
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
|
||||
if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0)
|
||||
C = (float3)((int3)C & (int3)0xF8);
|
||||
else if (PS_COLCLIP == 1 && PS_HDR == 0)
|
||||
else if (PS_COLCLIP == 1 || PS_HDR == 1)
|
||||
C = (float3)((int3)C & (int3)0xFF);
|
||||
}
|
||||
}
|
||||
|
@ -952,7 +943,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||
ps_fbmask(C, input.p.xy);
|
||||
|
||||
#if !PS_NO_COLOR
|
||||
output.c0 = PS_HDR ? float4(C.rgb, C.a / 255.0f) : C / 255.0f;
|
||||
output.c0 = PS_HDR ? float4(C.rgb / 65535.0f, C.a / 255.0f) : C / 255.0f;
|
||||
#if !PS_NO_COLOR1
|
||||
output.c1 = (float4)(alpha_blend);
|
||||
#endif
|
||||
|
|
|
@ -322,7 +322,7 @@ void ps_datm0()
|
|||
void ps_hdr_init()
|
||||
{
|
||||
vec4 value = sample_c();
|
||||
SV_Target0 = vec4(round(value.rgb * 255.0f), value.a);
|
||||
SV_Target0 = vec4(round(value.rgb * 255.0f) / 65535.0f, value.a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -330,7 +330,7 @@ void ps_hdr_init()
|
|||
void ps_hdr_resolve()
|
||||
{
|
||||
vec4 value = sample_c();
|
||||
SV_Target0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a);
|
||||
SV_Target0 = vec4(vec3(uvec3(value.rgb * 65535.0f) & 255u) / 255.0f, value.a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -620,7 +620,7 @@ void ps_fbmask(inout vec4 C)
|
|||
// FIXME do I need special case for 16 bits
|
||||
#if PS_FBMASK
|
||||
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
|
||||
C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask));
|
||||
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -638,18 +638,9 @@ void ps_dither(inout vec3 C)
|
|||
|
||||
void ps_color_clamp_wrap(inout vec3 C)
|
||||
{
|
||||
#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR
|
||||
ivec3 color = ivec3(C);
|
||||
#if PS_DFMT == FMT_16
|
||||
color &= 0xF8;
|
||||
#endif
|
||||
// -128 to 127 gives us longer before we run out of float precision
|
||||
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
|
||||
C = vec3((color << 24) >> 24);
|
||||
|
||||
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
|
||||
// so we need to limit the color depth on dithered items
|
||||
#elif SW_BLEND || PS_DITHER || PS_FBMASK
|
||||
#if SW_BLEND || PS_DITHER || PS_FBMASK
|
||||
|
||||
// Correct the Color value based on the output format
|
||||
#if PS_COLCLIP == 0 && PS_HDR == 0
|
||||
|
@ -666,7 +657,7 @@ void ps_color_clamp_wrap(inout vec3 C)
|
|||
#if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0
|
||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||
C = vec3(ivec3(C) & ivec3(0xF8));
|
||||
#elif PS_COLCLIP == 1 && PS_HDR == 0
|
||||
#elif PS_COLCLIP == 1 || PS_HDR == 1
|
||||
C = vec3(ivec3(C) & ivec3(0xFF));
|
||||
#endif
|
||||
|
||||
|
@ -934,8 +925,8 @@ void ps_main()
|
|||
ps_fbmask(C);
|
||||
|
||||
#if !PS_NO_COLOR
|
||||
#if PS_HDR
|
||||
SV_Target0 = vec4(C.rgb, C.a / 255.0f);
|
||||
#if PS_HDR == 1
|
||||
SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
|
||||
#else
|
||||
SV_Target0 = C / 255.0f;
|
||||
#endif
|
||||
|
|
|
@ -94,7 +94,7 @@ void ps_datm0()
|
|||
void ps_hdr_init()
|
||||
{
|
||||
vec4 value = sample_c(v_tex);
|
||||
o_col0 = vec4(roundEven(value.rgb * 255.0f), value.a);
|
||||
o_col0 = vec4(roundEven(value.rgb * 255.0f) / 65535.0f, value.a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -102,7 +102,7 @@ void ps_hdr_init()
|
|||
void ps_hdr_resolve()
|
||||
{
|
||||
vec4 value = sample_c(v_tex);
|
||||
o_col0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a);
|
||||
o_col0 = vec4(vec3(uvec3(value.rgb * 65535.5f) & 255u) / 255.0f, value.a);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -946,7 +946,7 @@ void ps_fbmask(inout vec4 C)
|
|||
{
|
||||
#if PS_FBMASK
|
||||
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
|
||||
C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask));
|
||||
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -967,18 +967,9 @@ void ps_dither(inout vec3 C)
|
|||
|
||||
void ps_color_clamp_wrap(inout vec3 C)
|
||||
{
|
||||
#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR
|
||||
ivec3 color = ivec3(C);
|
||||
#if PS_DFMT == FMT_16
|
||||
color &= 0xF8;
|
||||
#endif
|
||||
// -128 to 127 gives us longer before we run out of float precision
|
||||
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
|
||||
C = vec3((color << 24) >> 24);
|
||||
|
||||
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
|
||||
// so we need to limit the color depth on dithered items
|
||||
#elif SW_BLEND || PS_DITHER || PS_FBMASK
|
||||
#if SW_BLEND || PS_DITHER || PS_FBMASK
|
||||
|
||||
// Correct the Color value based on the output format
|
||||
#if PS_COLCLIP == 0 && PS_HDR == 0
|
||||
|
@ -995,7 +986,7 @@ void ps_color_clamp_wrap(inout vec3 C)
|
|||
#if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0
|
||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||
C = vec3(ivec3(C) & ivec3(0xF8));
|
||||
#elif PS_COLCLIP == 1 && PS_HDR == 0
|
||||
#elif PS_COLCLIP == 1 || PS_HDR == 1
|
||||
C = vec3(ivec3(C) & ivec3(0xFF));
|
||||
#endif
|
||||
|
||||
|
@ -1235,8 +1226,8 @@ void main()
|
|||
ps_fbmask(C);
|
||||
|
||||
#if !PS_NO_COLOR
|
||||
#if PS_HDR
|
||||
o_col0 = vec4(C.rgb, C.a / 255.0f);
|
||||
#if PS_HDR == 1
|
||||
o_col0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
|
||||
#else
|
||||
o_col0 = C / 255.0f;
|
||||
#endif
|
||||
|
|
|
@ -84,7 +84,7 @@ u32 GSTexture::GetCompressedBytesPerBlock() const
|
|||
static constexpr u32 bytes_per_block[] = {
|
||||
1, // Invalid
|
||||
4, // Color/RGBA8
|
||||
16, // HDRColor/RGBA32F
|
||||
8, // HDRColor/RGBA16
|
||||
32, // DepthStencil
|
||||
1, // UNorm8/R8
|
||||
2, // UInt16/R16UI
|
||||
|
|
|
@ -39,7 +39,7 @@ public:
|
|||
{
|
||||
Invalid = 0, ///< Used for initialization
|
||||
Color, ///< Standard (RGBA8) color texture
|
||||
HDRColor, ///< Float-based color texture for colclip emulation (RGBA32F)
|
||||
HDRColor, ///< Color texture with more bits for colclip emulation (RGBA16Unorm)
|
||||
DepthStencil, ///< Depth stencil texture
|
||||
UNorm8, ///< A8UNorm texture for paletted textures and the OSD font
|
||||
UInt16, ///< UInt16 texture for reading back 16-bit depth
|
||||
|
|
|
@ -465,7 +465,7 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height
|
|||
switch (format)
|
||||
{
|
||||
case GSTexture::Format::Color: dxformat = DXGI_FORMAT_R8G8B8A8_UNORM; break;
|
||||
case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
|
||||
case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R16G16B16A16_UNORM; break;
|
||||
case GSTexture::Format::DepthStencil: dxformat = DXGI_FORMAT_R32G8X24_TYPELESS; break;
|
||||
case GSTexture::Format::UNorm8: dxformat = DXGI_FORMAT_A8_UNORM; break;
|
||||
case GSTexture::Format::UInt16: dxformat = DXGI_FORMAT_R16_UINT; break;
|
||||
|
|
|
@ -284,7 +284,7 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f
|
|||
static constexpr std::array<std::array<DXGI_FORMAT, 4>, static_cast<int>(GSTexture::Format::BC7) + 1> s_format_mapping = {{
|
||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // Invalid
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN}, // Color
|
||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN}, // HDRColor
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_UNKNOWN}, // HDRColor
|
||||
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_D32_FLOAT_S8X24_UINT}, // DepthStencil
|
||||
{DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN}, // UNorm8
|
||||
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_UNKNOWN}, // UInt16
|
||||
|
@ -1161,7 +1161,7 @@ bool GSDevice12::CompileConvertPipelines()
|
|||
{
|
||||
pxAssert(!arr[ds]);
|
||||
|
||||
gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM);
|
||||
gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R16G16B16A16_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM);
|
||||
gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN);
|
||||
arr[ds] = gpb.Create(g_d3d12_context->GetDevice(), m_shader_cache, false);
|
||||
if (!arr[ds])
|
||||
|
|
|
@ -2767,7 +2767,6 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
|
|||
// A fast algo that requires 2 passes
|
||||
GL_INS("COLCLIP Fast HDR mode ENABLED");
|
||||
m_conf.ps.hdr = 1;
|
||||
m_conf.ps.colclip = accumulation_blend; // reuse as a flag for accumulation blend
|
||||
blend_mix = false;
|
||||
sw_blending = true; // Enable sw blending for the HDR algo
|
||||
}
|
||||
|
@ -2865,13 +2864,23 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
|
|||
m_conf.ps.blend_d = 2;
|
||||
}
|
||||
|
||||
if (m_conf.ps.blend_a == 2)
|
||||
if (blend.op == GSDevice::OP_REV_SUBTRACT)
|
||||
{
|
||||
// The blend unit does a reverse subtraction so it means
|
||||
// the shader must output a positive value.
|
||||
// Replace 0 - Cs by Cs - 0
|
||||
m_conf.ps.blend_a = m_conf.ps.blend_b;
|
||||
m_conf.ps.blend_b = 2;
|
||||
ASSERT(m_conf.ps.blend_a == 2);
|
||||
if (m_conf.ps.hdr)
|
||||
{
|
||||
// HDR uses unorm, which is always positive
|
||||
// Have the shader do the inversion, then clip to remove the negative
|
||||
m_conf.blend.op = GSDevice::OP_ADD;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The blend unit does a reverse subtraction so it means
|
||||
// the shader must output a positive value.
|
||||
// Replace 0 - Cs by Cs - 0
|
||||
m_conf.ps.blend_a = m_conf.ps.blend_b;
|
||||
m_conf.ps.blend_b = 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Dual source output not needed (accumulation blend replaces it with ONE).
|
||||
|
|
|
@ -374,7 +374,7 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format)
|
|||
case GSTexture::Format::UInt16: return MTLPixelFormatR16Uint;
|
||||
case GSTexture::Format::UNorm8: return MTLPixelFormatA8Unorm;
|
||||
case GSTexture::Format::Color: return MTLPixelFormatRGBA8Unorm;
|
||||
case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA32Float;
|
||||
case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA16Unorm;
|
||||
case GSTexture::Format::DepthStencil: return MTLPixelFormatDepth32Float_Stencil8;
|
||||
case GSTexture::Format::Invalid: return MTLPixelFormatInvalid;
|
||||
case GSTexture::Format::BC1: return MTLPixelFormatBC1_RGBA;
|
||||
|
|
|
@ -111,16 +111,16 @@ fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn<
|
|||
return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX;
|
||||
}
|
||||
|
||||
fragment half4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<half> tex)
|
||||
fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex)
|
||||
{
|
||||
half4 in = tex.read(p);
|
||||
return half4(round(in.rgb * 255.h), in.a);
|
||||
float4 in = tex.read(p);
|
||||
return float4(round(in.rgb * 255.f) / 65535.f, in.a);
|
||||
}
|
||||
|
||||
fragment float4 ps_hdr_resolve(float4 p [[position]], DirectReadTextureIn<float> tex)
|
||||
{
|
||||
float4 in = tex.read(p);
|
||||
return float4(float3(int3(in.rgb) & 255) / 255.f, in.a);
|
||||
return float4(float3(uint3(in.rgb * 65535.5f) & 255) / 255.f, in.a);
|
||||
}
|
||||
|
||||
fragment float4 ps_filter_transparency(ConvertShaderData data [[stage_in]], ConvertPSRes res)
|
||||
|
|
|
@ -784,17 +784,6 @@ struct PSMain
|
|||
|
||||
void ps_color_clamp_wrap(thread float4& C)
|
||||
{
|
||||
if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR
|
||||
{
|
||||
int3 color = int3(C.rgb);
|
||||
if (PS_DFMT == FMT_16)
|
||||
color &= 0xF8;
|
||||
// -128 to 127 gives us longer before we run out of float precision
|
||||
// Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1
|
||||
C.rgb = float3(char3(color));
|
||||
return;
|
||||
}
|
||||
|
||||
// When dithering the bottom 3 bits become meaningless and cause lines in the picture so we need to limit the color depth on dithered items
|
||||
if (!SW_BLEND && !PS_DITHER && !PS_FBMASK)
|
||||
return;
|
||||
|
@ -812,7 +801,7 @@ struct PSMain
|
|||
if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0)
|
||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||
C.rgb = float3(short3(C.rgb) & 0xF8);
|
||||
else if (PS_COLCLIP && !PS_HDR)
|
||||
else if (PS_COLCLIP || PS_HDR)
|
||||
C.rgb = float3(short3(C.rgb) & 0xFF);
|
||||
}
|
||||
|
||||
|
@ -989,7 +978,7 @@ struct PSMain
|
|||
ps_fbmask(C);
|
||||
|
||||
if (PS_COLOR0)
|
||||
out.c0 = PS_HDR ? float4(C.rgb, C.a / 255.f) : C / 255.f;
|
||||
out.c0 = PS_HDR ? float4(C.rgb / 65535.f, C.a / 255.f) : C / 255.f;
|
||||
if (PS_COLOR0 && PS_ONLY_ALPHA)
|
||||
out.c0.rgb = 0;
|
||||
if (PS_COLOR1)
|
||||
|
|
|
@ -225,10 +225,10 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
|
|||
|
||||
// 4 channel float
|
||||
case Format::HDRColor:
|
||||
gl_fmt = GL_RGBA32F;
|
||||
gl_fmt = GL_RGBA16;
|
||||
m_int_format = GL_RGBA;
|
||||
m_int_type = GL_FLOAT;
|
||||
m_int_shift = 4;
|
||||
m_int_type = GL_UNSIGNED_SHORT;
|
||||
m_int_shift = 3;
|
||||
break;
|
||||
|
||||
// Depth buffer
|
||||
|
|
|
@ -378,7 +378,7 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const
|
|||
static constexpr std::array<VkFormat, static_cast<int>(GSTexture::Format::BC7) + 1> s_format_mapping = {{
|
||||
VK_FORMAT_UNDEFINED, // Invalid
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Color
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT, // HDRColor
|
||||
VK_FORMAT_R16G16B16A16_UNORM, // HDRColor
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT, // DepthStencil
|
||||
VK_FORMAT_R8_UNORM, // UNorm8
|
||||
VK_FORMAT_R16_UINT, // UInt16
|
||||
|
|
Loading…
Reference in New Issue