diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx index 0134560798..9d5870b0d3 100644 --- a/bin/resources/shaders/dx11/convert.fx +++ b/bin/resources/shaders/dx11/convert.fx @@ -125,7 +125,7 @@ PS_OUTPUT ps_hdr_init(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); - output.c = float4(round(value.rgb * 255), value.a); + output.c = float4(round(value.rgb * 255) / 65535, value.a); return output; } @@ -133,7 +133,7 @@ PS_OUTPUT ps_hdr_resolve(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); - output.c = float4(float3(int3(value.rgb) & 255) / 255, value.a); + output.c = float4(float3(uint3(value.rgb * 65535.5) & 255) / 255, value.a); return output; } diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index b792e7e7dd..28b1e8a53e 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -715,7 +715,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy) if (PS_FBMASK) { float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f); - C = (float4)(((uint4)(int4)C & (FbMask ^ 0xFF)) | ((uint4)RT & FbMask)); + C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask)); } } @@ -736,18 +736,9 @@ void ps_dither(inout float3 C, float2 pos_xy) void ps_color_clamp_wrap(inout float3 C) { - if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR - { - int3 color = int3(C); - if (PS_DFMT == FMT_16) - color &= (int3)0xF8; - // -128 to 127 gives us longer before we run out of float precision - // Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1 - C = float3((color << 24) >> 24); - } // When dithering the bottom 3 bits become meaningless and cause lines in the picture // so we need to limit the color depth on dithered items - else if (SW_BLEND || PS_DITHER || PS_FBMASK) + if (SW_BLEND || PS_DITHER || PS_FBMASK) { // Standard Clamp if (PS_COLCLIP == 0 && PS_HDR == 0) @@ -756,7 +747,7 @@ void ps_color_clamp_wrap(inout float3 C) // In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0) C = (float3)((int3)C & (int3)0xF8); - else if (PS_COLCLIP == 1 && PS_HDR == 0) + else if (PS_COLCLIP == 1 || PS_HDR == 1) C = (float3)((int3)C & (int3)0xFF); } } @@ -952,7 +943,7 @@ PS_OUTPUT ps_main(PS_INPUT input) ps_fbmask(C, input.p.xy); #if !PS_NO_COLOR - output.c0 = PS_HDR ? float4(C.rgb, C.a / 255.0f) : C / 255.0f; + output.c0 = PS_HDR ? float4(C.rgb / 65535.0f, C.a / 255.0f) : C / 255.0f; #if !PS_NO_COLOR1 output.c1 = (float4)(alpha_blend); #endif diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl index 8900d9f693..710bee3a40 100644 --- a/bin/resources/shaders/opengl/convert.glsl +++ b/bin/resources/shaders/opengl/convert.glsl @@ -322,7 +322,7 @@ void ps_datm0() void ps_hdr_init() { vec4 value = sample_c(); - SV_Target0 = vec4(round(value.rgb * 255.0f), value.a); + SV_Target0 = vec4(round(value.rgb * 255.0f) / 65535.0f, value.a); } #endif @@ -330,7 +330,7 @@ void ps_hdr_init() void ps_hdr_resolve() { vec4 value = sample_c(); - SV_Target0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a); + SV_Target0 = vec4(vec3(uvec3(value.rgb * 65535.0f) & 255u) / 255.0f, value.a); } #endif diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 4546cea1f8..599010dd57 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -620,7 +620,7 @@ void ps_fbmask(inout vec4 C) // FIXME do I need special case for 16 bits #if PS_FBMASK vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f); - C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask)); + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); #endif } @@ -638,18 +638,9 @@ void ps_dither(inout vec3 C) void ps_color_clamp_wrap(inout vec3 C) { -#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR - ivec3 color = ivec3(C); -#if PS_DFMT == FMT_16 - color &= 0xF8; -#endif - // -128 to 127 gives us longer before we run out of float precision - // Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1 - C = vec3((color << 24) >> 24); - // When dithering the bottom 3 bits become meaningless and cause lines in the picture // so we need to limit the color depth on dithered items -#elif SW_BLEND || PS_DITHER || PS_FBMASK +#if SW_BLEND || PS_DITHER || PS_FBMASK // Correct the Color value based on the output format #if PS_COLCLIP == 0 && PS_HDR == 0 @@ -666,7 +657,7 @@ void ps_color_clamp_wrap(inout vec3 C) #if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0 // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C = vec3(ivec3(C) & ivec3(0xF8)); -#elif PS_COLCLIP == 1 && PS_HDR == 0 +#elif PS_COLCLIP == 1 || PS_HDR == 1 C = vec3(ivec3(C) & ivec3(0xFF)); #endif @@ -934,8 +925,8 @@ void ps_main() ps_fbmask(C); #if !PS_NO_COLOR -#if PS_HDR - SV_Target0 = vec4(C.rgb, C.a / 255.0f); +#if PS_HDR == 1 + SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f); #else SV_Target0 = C / 255.0f; #endif diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index fe23bab1a4..e23190313f 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -94,7 +94,7 @@ void ps_datm0() void ps_hdr_init() { vec4 value = sample_c(v_tex); - o_col0 = vec4(roundEven(value.rgb * 255.0f), value.a); + o_col0 = vec4(roundEven(value.rgb * 255.0f) / 65535.0f, value.a); } #endif @@ -102,7 +102,7 @@ void ps_hdr_init() void ps_hdr_resolve() { vec4 value = sample_c(v_tex); - o_col0 = vec4(vec3(ivec3(value.rgb) & 255) / 255.0f, value.a); + o_col0 = vec4(vec3(uvec3(value.rgb * 65535.5f) & 255u) / 255.0f, value.a); } #endif diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 6a02a556aa..8ca8068aeb 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -946,7 +946,7 @@ void ps_fbmask(inout vec4 C) { #if PS_FBMASK vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); - C = vec4((uvec4(ivec4(C)) & (FbMask ^ 0xFFu)) | (uvec4(RT) & FbMask)); + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); #endif } @@ -967,18 +967,9 @@ void ps_dither(inout vec3 C) void ps_color_clamp_wrap(inout vec3 C) { -#if PS_HDR && PS_COLCLIP // COLCLIP flag indicates accumulation blend under HDR - ivec3 color = ivec3(C); -#if PS_DFMT == FMT_16 - color &= 0xF8; -#endif - // -128 to 127 gives us longer before we run out of float precision - // Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1 - C = vec3((color << 24) >> 24); - // When dithering the bottom 3 bits become meaningless and cause lines in the picture // so we need to limit the color depth on dithered items -#elif SW_BLEND || PS_DITHER || PS_FBMASK +#if SW_BLEND || PS_DITHER || PS_FBMASK // Correct the Color value based on the output format #if PS_COLCLIP == 0 && PS_HDR == 0 @@ -995,7 +986,7 @@ void ps_color_clamp_wrap(inout vec3 C) #if PS_DFMT == FMT_16 && PS_BLEND_MIX == 0 // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C = vec3(ivec3(C) & ivec3(0xF8)); -#elif PS_COLCLIP == 1 && PS_HDR == 0 +#elif PS_COLCLIP == 1 || PS_HDR == 1 C = vec3(ivec3(C) & ivec3(0xFF)); #endif @@ -1235,8 +1226,8 @@ void main() ps_fbmask(C); #if !PS_NO_COLOR -#if PS_HDR - o_col0 = vec4(C.rgb, C.a / 255.0f); +#if PS_HDR == 1 + o_col0 = vec4(C.rgb / 65535.0f, C.a / 255.0f); #else o_col0 = C / 255.0f; #endif diff --git a/pcsx2/GS/Renderers/Common/GSTexture.cpp b/pcsx2/GS/Renderers/Common/GSTexture.cpp index e2b3652c73..41e0075c54 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.cpp +++ b/pcsx2/GS/Renderers/Common/GSTexture.cpp @@ -84,7 +84,7 @@ u32 GSTexture::GetCompressedBytesPerBlock() const static constexpr u32 bytes_per_block[] = { 1, // Invalid 4, // Color/RGBA8 - 16, // HDRColor/RGBA32F + 8, // HDRColor/RGBA16 32, // DepthStencil 1, // UNorm8/R8 2, // UInt16/R16UI diff --git a/pcsx2/GS/Renderers/Common/GSTexture.h b/pcsx2/GS/Renderers/Common/GSTexture.h index ee363fc738..6b22539d49 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.h +++ b/pcsx2/GS/Renderers/Common/GSTexture.h @@ -39,7 +39,7 @@ public: { Invalid = 0, ///< Used for initialization Color, ///< Standard (RGBA8) color texture - HDRColor, ///< Float-based color texture for colclip emulation (RGBA32F) + HDRColor, ///< Color texture with more bits for colclip emulation (RGBA16Unorm) DepthStencil, ///< Depth stencil texture UNorm8, ///< A8UNorm texture for paletted textures and the OSD font UInt16, ///< UInt16 texture for reading back 16-bit depth diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index bb8cf10f0f..fdbfe51775 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -465,7 +465,7 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height switch (format) { case GSTexture::Format::Color: dxformat = DXGI_FORMAT_R8G8B8A8_UNORM; break; - case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R32G32B32A32_FLOAT; break; + case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R16G16B16A16_UNORM; break; case GSTexture::Format::DepthStencil: dxformat = DXGI_FORMAT_R32G8X24_TYPELESS; break; case GSTexture::Format::UNorm8: dxformat = DXGI_FORMAT_A8_UNORM; break; case GSTexture::Format::UInt16: dxformat = DXGI_FORMAT_R16_UINT; break; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 0d1eb62934..2893b5dd65 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -284,7 +284,7 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f static constexpr std::array, static_cast(GSTexture::Format::BC7) + 1> s_format_mapping = {{ {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // Invalid {DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN}, // Color - {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN}, // HDRColor + {DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_UNKNOWN}, // HDRColor {DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_D32_FLOAT_S8X24_UINT}, // DepthStencil {DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN}, // UNorm8 {DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_UNKNOWN}, // UInt16 @@ -1161,7 +1161,7 @@ bool GSDevice12::CompileConvertPipelines() { pxAssert(!arr[ds]); - gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R16G16B16A16_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM); gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN); arr[ds] = gpb.Create(g_d3d12_context->GetDevice(), m_shader_cache, false); if (!arr[ds]) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 3db87c2f24..18064fa069 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2767,7 +2767,6 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& // A fast algo that requires 2 passes GL_INS("COLCLIP Fast HDR mode ENABLED"); m_conf.ps.hdr = 1; - m_conf.ps.colclip = accumulation_blend; // reuse as a flag for accumulation blend blend_mix = false; sw_blending = true; // Enable sw blending for the HDR algo } @@ -2865,13 +2864,23 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool& m_conf.ps.blend_d = 2; } - if (m_conf.ps.blend_a == 2) + if (blend.op == GSDevice::OP_REV_SUBTRACT) { - // The blend unit does a reverse subtraction so it means - // the shader must output a positive value. - // Replace 0 - Cs by Cs - 0 - m_conf.ps.blend_a = m_conf.ps.blend_b; - m_conf.ps.blend_b = 2; + ASSERT(m_conf.ps.blend_a == 2); + if (m_conf.ps.hdr) + { + // HDR uses unorm, which is always positive + // Have the shader do the inversion, then clip to remove the negative + m_conf.blend.op = GSDevice::OP_ADD; + } + else + { + // The blend unit does a reverse subtraction so it means + // the shader must output a positive value. + // Replace 0 - Cs by Cs - 0 + m_conf.ps.blend_a = m_conf.ps.blend_b; + m_conf.ps.blend_b = 2; + } } // Dual source output not needed (accumulation blend replaces it with ONE). diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index fa759986fc..69198e70c5 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -374,7 +374,7 @@ static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format) case GSTexture::Format::UInt16: return MTLPixelFormatR16Uint; case GSTexture::Format::UNorm8: return MTLPixelFormatA8Unorm; case GSTexture::Format::Color: return MTLPixelFormatRGBA8Unorm; - case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA32Float; + case GSTexture::Format::HDRColor: return MTLPixelFormatRGBA16Unorm; case GSTexture::Format::DepthStencil: return MTLPixelFormatDepth32Float_Stencil8; case GSTexture::Format::Invalid: return MTLPixelFormatInvalid; case GSTexture::Format::BC1: return MTLPixelFormatBC1_RGBA; diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index e6b58137c4..e4eedc5ddd 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -111,16 +111,16 @@ fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn< return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX; } -fragment half4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn tex) +fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn tex) { - half4 in = tex.read(p); - return half4(round(in.rgb * 255.h), in.a); + float4 in = tex.read(p); + return float4(round(in.rgb * 255.f) / 65535.f, in.a); } fragment float4 ps_hdr_resolve(float4 p [[position]], DirectReadTextureIn tex) { float4 in = tex.read(p); - return float4(float3(int3(in.rgb) & 255) / 255.f, in.a); + return float4(float3(uint3(in.rgb * 65535.5f) & 255) / 255.f, in.a); } fragment float4 ps_filter_transparency(ConvertShaderData data [[stage_in]], ConvertPSRes res) diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 821cbeba11..57c13faa15 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -784,17 +784,6 @@ struct PSMain void ps_color_clamp_wrap(thread float4& C) { - if (PS_HDR && PS_COLCLIP) // COLCLIP flag indicates accumulation blend under HDR - { - int3 color = int3(C.rgb); - if (PS_DFMT == FMT_16) - color &= 0xF8; - // -128 to 127 gives us longer before we run out of float precision - // Especially for games that mainly use 1 and 255 (sly), since that maps to 1 and -1 - C.rgb = float3(char3(color)); - return; - } - // When dithering the bottom 3 bits become meaningless and cause lines in the picture so we need to limit the color depth on dithered items if (!SW_BLEND && !PS_DITHER && !PS_FBMASK) return; @@ -812,7 +801,7 @@ struct PSMain if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C.rgb = float3(short3(C.rgb) & 0xF8); - else if (PS_COLCLIP && !PS_HDR) + else if (PS_COLCLIP || PS_HDR) C.rgb = float3(short3(C.rgb) & 0xFF); } @@ -989,7 +978,7 @@ struct PSMain ps_fbmask(C); if (PS_COLOR0) - out.c0 = PS_HDR ? float4(C.rgb, C.a / 255.f) : C / 255.f; + out.c0 = PS_HDR ? float4(C.rgb / 65535.f, C.a / 255.f) : C / 255.f; if (PS_COLOR0 && PS_ONLY_ALPHA) out.c0.rgb = 0; if (PS_COLOR1) diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index ee4aaeb2b3..616a00c353 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -225,10 +225,10 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format // 4 channel float case Format::HDRColor: - gl_fmt = GL_RGBA32F; + gl_fmt = GL_RGBA16; m_int_format = GL_RGBA; - m_int_type = GL_FLOAT; - m_int_shift = 4; + m_int_type = GL_UNSIGNED_SHORT; + m_int_shift = 3; break; // Depth buffer diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 82ace898e7..5f999e90e5 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -378,7 +378,7 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const static constexpr std::array(GSTexture::Format::BC7) + 1> s_format_mapping = {{ VK_FORMAT_UNDEFINED, // Invalid VK_FORMAT_R8G8B8A8_UNORM, // Color - VK_FORMAT_R32G32B32A32_SFLOAT, // HDRColor + VK_FORMAT_R16G16B16A16_UNORM, // HDRColor VK_FORMAT_D32_SFLOAT_S8_UINT, // DepthStencil VK_FORMAT_R8_UNORM, // UNorm8 VK_FORMAT_R16_UINT, // UInt16