From 10e8c486d89c57e2eaaa314fbf0aee27c8bbffc6 Mon Sep 17 00:00:00 2001 From: Filoppi Date: Sat, 19 Apr 2025 20:53:16 +0300 Subject: [PATCH] WIP: HDR --- bin/resources/shaders/common/fxaa.fx | 78 ++--- bin/resources/shaders/dx11/colorcorrect.fx | 176 ++++++++++++ bin/resources/shaders/dx11/convert.fx | 46 ++- bin/resources/shaders/dx11/imgui.fx | 12 +- bin/resources/shaders/dx11/present.fx | 232 +++++++++++++-- bin/resources/shaders/dx11/shadeboost.fx | 52 ---- bin/resources/shaders/dx11/tfx.fx | 270 ++++++++++++++---- .../shaders/opengl/colorcorrect.glsl | 105 +++++++ bin/resources/shaders/opengl/convert.glsl | 28 +- bin/resources/shaders/opengl/present.glsl | 14 +- bin/resources/shaders/opengl/shadeboost.glsl | 60 ---- bin/resources/shaders/opengl/tfx_fs.glsl | 4 +- .../shaders/vulkan/colorcorrect.glsl | 186 ++++++++++++ bin/resources/shaders/vulkan/convert.glsl | 56 +++- bin/resources/shaders/vulkan/imgui.glsl | 23 +- bin/resources/shaders/vulkan/present.glsl | 52 +++- bin/resources/shaders/vulkan/shadeboost.glsl | 74 ----- bin/resources/shaders/vulkan/tfx.glsl | 211 +++++++++++--- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 40 +++ pcsx2-qt/Settings/GraphicsSettingsWidget.h | 2 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 157 +++++++++- pcsx2/Config.h | 25 ++ pcsx2/GS/GS.cpp | 18 ++ pcsx2/GS/GSCapture.cpp | 3 +- pcsx2/GS/GSCapture.h | 7 +- pcsx2/GS/GSClut.cpp | 2 +- pcsx2/GS/GSClut.h | 2 +- pcsx2/GS/Renderers/Common/GSDevice.cpp | 57 ++-- pcsx2/GS/Renderers/Common/GSDevice.h | 42 ++- pcsx2/GS/Renderers/Common/GSRenderer.cpp | 13 +- pcsx2/GS/Renderers/Common/GSTexture.cpp | 7 +- pcsx2/GS/Renderers/DX11/D3D.cpp | 1 + pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp | 2 + pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 182 ++++++++++-- pcsx2/GS/Renderers/DX11/GSDevice11.h | 6 +- pcsx2/GS/Renderers/DX11/GSTexture11.cpp | 30 +- pcsx2/GS/Renderers/DX11/GSTexture11.h | 2 +- pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 183 +++++++++--- pcsx2/GS/Renderers/DX12/GSDevice12.h | 17 +- pcsx2/GS/Renderers/DX12/GSTexture12.cpp | 21 ++ pcsx2/GS/Renderers/HW/GSHwHack.cpp | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 24 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 66 +++-- .../HW/GSTextureReplacementLoaders.cpp | 7 + .../GS/Renderers/HW/GSTextureReplacements.cpp | 2 +- pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 50 +++- pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h | 3 +- pcsx2/GS/Renderers/Metal/present.metal | 12 +- pcsx2/GS/Renderers/Metal/tfx.metal | 1 + pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 35 +-- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h | 6 +- pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp | 1 + pcsx2/GS/Renderers/SW/GSRendererSW.cpp | 3 +- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 210 +++++++++++--- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 10 +- pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp | 2 +- pcsx2/GS/Renderers/Vulkan/VKSwapChain.cpp | 52 +++- pcsx2/ImGui/FullscreenUI.cpp | 26 +- pcsx2/Pcsx2Config.cpp | 17 ++ pcsx2/ShaderCacheVersion.h | 2 +- pcsx2/VMManager.cpp | 5 + pcsx2/pcsx2.vcxproj | 8 +- pcsx2/pcsx2.vcxproj.filters | 20 +- 63 files changed, 2443 insertions(+), 619 deletions(-) create mode 100644 bin/resources/shaders/dx11/colorcorrect.fx delete mode 100644 bin/resources/shaders/dx11/shadeboost.fx create mode 100644 bin/resources/shaders/opengl/colorcorrect.glsl delete mode 100644 bin/resources/shaders/opengl/shadeboost.glsl create mode 100644 bin/resources/shaders/vulkan/colorcorrect.glsl delete mode 100644 bin/resources/shaders/vulkan/shadeboost.glsl diff --git a/bin/resources/shaders/common/fxaa.fx b/bin/resources/shaders/common/fxaa.fx index 5b01f48efa..baaf7cac63 100644 --- a/bin/resources/shaders/common/fxaa.fx +++ b/bin/resources/shaders/common/fxaa.fx @@ -11,6 +11,10 @@ #define FXAA_GLSL_VK 0 #endif +#ifndef PS_HDR + #define PS_HDR 0 +#endif + #define UHQ_FXAA 1 //High Quality Fast Approximate Anti Aliasing. Adapted for GS from Timothy Lottes FXAA 3.11. #define FxaaSubpixMax 0.0 //[0.00 to 1.00] Amount of subpixel aliasing removal. 0.00: Edge only antialiasing (no blurring) #define FxaaEarlyExit 1 //[0 or 1] Use Fxaa early exit pathing. When disabled, the entire scene is antialiased(FSAA). 0 is off, 1 is on. @@ -102,47 +106,30 @@ struct FxaaTex { SamplerState smpl; Texture2D tex; }; #define FXAA_QUALITY_P11 8.0 #define FXAA_QUALITY_P12 8.0 +#define DEFAULT_GAMMA 2.2 + /*------------------------------------------------------------------------------ [GAMMA PREPASS CODE SECTION] ------------------------------------------------------------------------------*/ float RGBLuminance(float3 color) { - const float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750); + const float3 lumCoeff = float3(0.2126, 0.7152, 0.0722); return dot(color.rgb, lumCoeff); } -float3 RGBGammaToLinear(float3 color, float gamma) -{ - color = FxaaSat(color); - color.r = (color.r <= 0.0404482362771082) ? - color.r / 12.92 : pow((color.r + 0.055) / 1.055, gamma); - color.g = (color.g <= 0.0404482362771082) ? - color.g / 12.92 : pow((color.g + 0.055) / 1.055, gamma); - color.b = (color.b <= 0.0404482362771082) ? - color.b / 12.92 : pow((color.b + 0.055) / 1.055, gamma); - - return color; -} - -float3 LinearToRGBGamma(float3 color, float gamma) -{ - color = FxaaSat(color); - color.r = (color.r <= 0.00313066844250063) ? - color.r * 12.92 : 1.055 * pow(color.r, 1.0 / gamma) - 0.055; - color.g = (color.g <= 0.00313066844250063) ? - color.g * 12.92 : 1.055 * pow(color.g, 1.0 / gamma) - 0.055; - color.b = (color.b <= 0.00313066844250063) ? - color.b * 12.92 : 1.055 * pow(color.b, 1.0 / gamma) - 0.055; - - return color; -} - float4 PreGammaPass(float4 color) { - const float GammaConst = 2.233; - color.rgb = RGBGammaToLinear(color.rgb, GammaConst); - color.rgb = LinearToRGBGamma(color.rgb, GammaConst); +#if !PS_HDR + // PS2 games didn't expect sRGB decoding from the display (which is different than raw gamma 2.2). + // HD TVs are all either 2.4 or 2.2. Most monitors are 2.2, not sRGB. + color.rgb = pow(abs(color.rgb), float3(DEFAULT_GAMMA, DEFAULT_GAMMA, DEFAULT_GAMMA)) * sign(color.rgb); +#endif + + // Calculate the luminance in linear space color.a = RGBLuminance(color.rgb); + + // Convert back to gamma space as FXAA expects it + color.rgb = pow(abs(color.rgb), float3(1.0 / DEFAULT_GAMMA, 1.0 / DEFAULT_GAMMA, 1.0 / DEFAULT_GAMMA)) * sign(color.rgb); return color; } @@ -153,21 +140,36 @@ float4 PreGammaPass(float4 color) ------------------------------------------------------------------------------*/ float FxaaLuma(float4 rgba) -{ +{ rgba.w = RGBLuminance(rgba.xyz); +#if PS_HDR + // In HDR, the source color was linear, so given that calculating luminance + // in linear space is better (more accurate), do so and then apply gamma to the luminance + rgba.w = pow(max(rgba.w, 0.0), 1.0 / DEFAULT_GAMMA); +#endif return rgba.w; } +float4 FxaaEncode(float4 rgba) +{ +#if PS_HDR + // Convert from linear to gamma space as FXAA expects it + rgba.rgb = pow(abs(rgba.rgb), float3(1.0 / DEFAULT_GAMMA, 1.0 / DEFAULT_GAMMA, 1.0 / DEFAULT_GAMMA)) * sign(rgba.rgb); +#endif + return rgba; +} + float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaSubpix, float fxaaEdgeThreshold, float fxaaEdgeThresholdMin) { float2 posM; posM.x = pos.x; posM.y = pos.y; - float4 rgbyM = FxaaTexTop(tex, posM); - rgbyM.w = RGBLuminance(rgbyM.xyz); + float4 rgbyM = FxaaEncode(FxaaTexTop(tex, posM)); + rgbyM.w = FxaaLuma(rgbyM); + rgbyM = FxaaEncode(rgbyM); #define lumaM rgbyM.w - + float lumaS = FxaaLuma(FxaaTexOff(tex, posM, int2( 0, 1), fxaaRcpFrame.xy)); float lumaE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 0), fxaaRcpFrame.xy)); float lumaN = FxaaLuma(FxaaTexOff(tex, posM, int2( 0,-1), fxaaRcpFrame.xy)); @@ -435,7 +437,7 @@ float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaS if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign; if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign; - return float4(FxaaTexTop(tex, posM).xyz, lumaM); + return float4(FxaaEncode(FxaaTexTop(tex, posM)).xyz, lumaM); } #if (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1) @@ -477,6 +479,9 @@ void main() vec4 color = texture(TextureSampler, PSin_t); color = PreGammaPass(color); color = FxaaPass(color, PSin_t); +#if PS_HDR + color.rgb = pow(abs(color.rgb), vec3(DEFAULT_GAMMA, DEFAULT_GAMMA, DEFAULT_GAMMA)) * sign(color.rgb); +#endif SV_Target0 = float4(color.rgb, 1.0); } @@ -490,6 +495,9 @@ PS_OUTPUT main(VS_OUTPUT input) color = PreGammaPass(color); color = FxaaPass(color, input.t); +#if PS_HDR + color.rgb = pow(abs(color.rgb), DEFAULT_GAMMA) * sign(color.rgb); +#endif output.c = float4(color.rgb, 1.0); diff --git a/bin/resources/shaders/dx11/colorcorrect.fx b/bin/resources/shaders/dx11/colorcorrect.fx new file mode 100644 index 0000000000..f9fe8a3ae4 --- /dev/null +++ b/bin/resources/shaders/dx11/colorcorrect.fx @@ -0,0 +1,176 @@ +// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#ifndef PS_HDR_INPUT +#define PS_HDR_INPUT 0 +#endif +#ifndef PS_HDR_OUTPUT +#define PS_HDR_OUTPUT 0 +#endif + +Texture2D Texture; +SamplerState Sampler; + +cbuffer cb0 : register(b0) +{ + float4 correction; + float4 adjustment; +}; + +// SMPTE 170M - BT.601 (NTSC-M) -> BT.709 +static const float3x3 from_NTSCM = float3x3( + 0.939497225737661, 0.0502268452914346, 0.0102759289709032, + 0.0177558637510127, 0.965824605885027, 0.0164195303639603, + -0.00162163209967010, -0.00437400622653655, 1.00599563832621); + +// ARIB TR-B9 (9300K+27MPCD with chromatic adaptation) (NTSC-J) -> BT.709 +static const float3x3 from_NTSCJ = float3x3( + 0.823613036967492, -0.0943227111084757, 0.00799341532931119, + 0.0289258355537324, 1.02310733489462, 0.00243547111576797, + -0.00569501554980891, 0.0161828357559315, 1.22328453915712); + +// EBU - BT.470BG/BT.601 (PAL) -> BT.709 +static const float3x3 from_PAL = float3x3( + 1.04408168421813, -0.0440816842181253, 0.000000000000000, + 0.000000000000000, 1.00000000000000, 0.000000000000000, + 0.000000000000000, 0.0118044782106489, 0.988195521789351); + +// Applies exponential ("Photographic") luminance/luma compression. +float RangeCompress(float X) +{ + // Branches are for static parameters optimizations + // This does e^X. We expect X to be between 0 and 1. + return 1.f - exp(-X); +} + +// Refurbished DICE HDR tonemapper (per channel or luminance). +float LuminanceCompress( + float InValue, + float OutMaxValue, + float ShoulderStart = 0.f) +{ + const float compressableValue = InValue - ShoulderStart; + const float compressedRange = OutMaxValue - ShoulderStart; + const float possibleOutValue = ShoulderStart + compressedRange * RangeCompress(compressableValue / compressedRange); + return (InValue <= ShoulderStart) ? InValue : possibleOutValue; +} + +/* +** Contrast, saturation, brightness +** Code of this function is from TGM's shader pack +** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 +*/ + +// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% +float4 ContrastSaturationBrightness(float4 color) // Ported to HLSL +{ + float brt = adjustment.x; + float con = adjustment.y; + float sat = adjustment.z; + + const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721); + +#if 1 // For linear space in/out + float3 AvgLumin = 0.18; // Mid gray +#else + // Increase or decrease these values to adjust r, g and b color channels separately + const float AvgLumR = 0.5; + const float AvgLumG = 0.5; + const float AvgLumB = 0.5; + float3 AvgLumin = float3(AvgLumR, AvgLumG, AvgLumB); +#endif + float3 brtColor = color.rgb * brt; + float intensity = dot(brtColor, LumCoeff); + float3 satColor = lerp(intensity, brtColor, sat); + float3 conColor = lerp(AvgLumin, satColor, con); + + color.rgb = conColor; + return color; +} + +struct PS_INPUT +{ + float4 p : SV_Position; + float2 t : TEXCOORD0; +}; + +// AdvancedAutoHDR pass to generate some HDR brightness out of an SDR signal. +// This is hue conserving and only really affects highlights. +// "SDRColor" is meant to be in "SDR range" (linear), as in, a value of 1 matching SDR white (something between 80, 100, 203, 300 nits, or whatever else) +// https://github.com/Filoppi/PumboAutoHDR +float3 PumboAutoHDR(float3 SDRColor, float PeakWhiteNits = 400.f, float PaperWhiteNits = 203.f, float ShoulderPow = 3.5f) +{ + const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721); + const float SDRRatio = dot(SDRColor, LumCoeff); + // Limit AutoHDR brightness, it won't look good beyond a certain level. + // The paper white multiplier is applied later so we account for that. + const float AutoHDRMaxWhite = max(min(PeakWhiteNits, 1000.f) / PaperWhiteNits, 1.f); + const float AutoHDRShoulderRatio = saturate(SDRRatio); + const float AutoHDRExtraRatio = pow(max(AutoHDRShoulderRatio, 0.f), ShoulderPow) * (AutoHDRMaxWhite - 1.f); + const float AutoHDRTotalRatio = SDRRatio + AutoHDRExtraRatio; + return SDRColor * (SDRRatio > 0.f ? (AutoHDRTotalRatio / SDRRatio) : 1.f); +} + +float4 ps_main(PS_INPUT input) : SV_Target0 +{ + float4 c = Texture.Sample(Sampler, input.t); + +#if PS_HDR_INPUT + // Tonemap in gamma space (this specific formula looks better with it) and by channel, to best retain the original color hues. + // Theoretically tonemapping should be done in the color space of the output display (e.g. BT.2020 in HDR and BT.709 in SDR), + // because displays usually clip individual rgb values to the peak brightness value of HDR, + // but for simplicity, we do it in the raw game color space. + + // In HDR, we only compress the range above SDR (1), in SDR, we compress the top 20% range, to avoid clipping and retain HDR detail. + float shoulderStart = 1.f; +#if !PS_HDR_OUTPUT + shoulderStart = 0.8f; +#endif + + float peakWhite = correction.w; + + c.r = LuminanceCompress(c.r, peakWhite, shoulderStart); + c.g = LuminanceCompress(c.g, peakWhite, shoulderStart); + c.b = LuminanceCompress(c.b, peakWhite, shoulderStart); +#endif + + // Linearize + c.rgb = pow(abs(c.rgb), correction.x) * sign(c.rgb); + +#if PS_HDR_OUTPUT && 0 // Print HDR colors + if (any(c.rgb > 1.0)) + { + c.rgb = float3(1, 0, 1); + } +#endif + + // Convert to BT.709 from the user specified game color space + if (correction.y == 1.f) + { + c.rgb = mul(c.rgb, from_NTSCM); + } + else if (correction.y == 2.f) + { + c.rgb = mul(c.rgb, from_NTSCJ); + } + else if (correction.y == 3.f) + { + c.rgb = mul(c.rgb, from_PAL); + } + +#if PS_HDR_OUTPUT && !PS_HDR_INPUT && 0 // AutoHDR + float HDRPaperWhite = correction.z; + c.rgb = PumboAutoHDR(c.rgb, 750.0, HDRPaperWhite * 80.0); +#endif + + c = ContrastSaturationBrightness(c); + +#if PS_HDR_OUTPUT + // Leave as linear, for scRGB HDR +#else + // Convert to Gamma 2.2 (not sRGB) + c.rgb = pow(max(c.rgb, 0.0), 1.0 / 2.2); +#endif + + return c; +} diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx index 9771e62d4c..18e05e9cee 100644 --- a/bin/resources/shaders/dx11/convert.fx +++ b/bin/resources/shaders/dx11/convert.fx @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ +#ifndef PS_HDR +#define PS_HDR 0 +#endif + struct VS_INPUT { float4 p : POSITION; @@ -105,7 +109,7 @@ PS_OUTPUT ps_filter_transparency(PS_INPUT input) // Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life uint ps_convert_rgba8_16bits(PS_INPUT input) : SV_Target0 { - uint4 i = sample_c(input.t) * float4(255.5f, 255.5f, 255.5f, 255.5f); + uint4 i = saturate(sample_c(input.t)) * float4(255.5f, 255.5f, 255.5f, 255.5f); return ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8); } @@ -158,7 +162,10 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); - output.c = float4(value.rgb, value.a / (128.25f / 255.0f)); +#if 1 //TODO: saturate this and the above+below ones too? I was suggested to not to + value.a = saturate(value.a); +#endif + output.c = float4(value.rgb, value.a * (255.0f / 128.25f)); return output; } @@ -166,15 +173,37 @@ PS_OUTPUT ps_rta_decorrection(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); +#if 1 + value.a = saturate(value.a); +#endif output.c = float4(value.rgb, value.a * (128.25f / 255.0f)); return output; } +float fmod_mask_positive(float a, float b) +{ + // Don't wrap if the number if a multiple, to emulate bit mask operators + if (fmod(a, b) == 0.f && a != 0.f) + { + return b; + } + return fmod(fmod(a, b) + b, b); +} +float3 fmod_mask_positive(float3 a, float b) +{ + return float3(fmod_mask_positive(a.x, b), fmod_mask_positive(a.y, b), fmod_mask_positive(a.z, b)); +} + PS_OUTPUT ps_colclip_init(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); + value.rgb = saturate(value.rgb); // Clamp to [0,1] range given we might have upgraded the "Color" texture to float/HDR, to avoid overflow +#if PS_HDR + output.c = float4(value.rgb * 255.f / 65535.f, value.a); +#else output.c = float4(round(value.rgb * 255) / 65535, value.a); +#endif return output; } @@ -182,7 +211,12 @@ PS_OUTPUT ps_colclip_resolve(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); +#if PS_HDR + //TODO: add handling for negative values here (fmod_positive())? Or is this pre-wrapped to be positive only? + output.c = float4(fmod_mask_positive(value.rgb * 65535.f, 255.f) / 255.f, value.a); +#else output.c = float4(float3(uint3(value.rgb * 65535.5) & 255) / 255, value.a); +#endif return output; } @@ -215,25 +249,25 @@ PS_OUTPUT ps_convert_float16_rgb5a1(PS_INPUT input) float rgba8_to_depth32(float4 val) { - uint4 c = uint4(val * 255.5f); + uint4 c = uint4(saturate(val) * 255.5f); return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); } float rgba8_to_depth24(float4 val) { - uint3 c = uint3(val.rgb * 255.5f); + uint3 c = uint3(saturate(val.rgb) * 255.5f); return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); } float rgba8_to_depth16(float4 val) { - uint2 c = uint2(val.rg * 255.5f); + uint2 c = uint2(saturate(val.rg) * 255.5f); return float(c.r | (c.g << 8)) * exp2(-32.0f); } float rgb5a1_to_depth16(float4 val) { - uint4 c = uint4(val * 255.5f); + uint4 c = uint4(saturate(val) * 255.5f); return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); } diff --git a/bin/resources/shaders/dx11/imgui.fx b/bin/resources/shaders/dx11/imgui.fx index bde3fc16da..66762b2530 100644 --- a/bin/resources/shaders/dx11/imgui.fx +++ b/bin/resources/shaders/dx11/imgui.fx @@ -1,9 +1,14 @@ // SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ -cbuffer vertexBuffer : register(b0) +#ifndef PS_HDR +#define PS_HDR 0 +#endif + +cbuffer cb0 : register(b0) { float4x4 ProjectionMatrix; + float4 Brightness; }; struct VS_INPUT @@ -35,5 +40,10 @@ Texture2D texture0 : register(t0); float4 ps_main(PS_INPUT input) : SV_Target { float4 out_col = input.col * texture0.Sample(sampler0, input.uv); +#if PS_HDR + out_col.rgb = pow(out_col.rgb, 2.2); + //out_col.a = pow(out_col.a, 1.0 / 2.2); // Approximation to match gamma space blends //TODO: bad? +#endif + out_col.rgb *= Brightness.x; // Always 1 in SDR return out_col; } diff --git a/bin/resources/shaders/dx11/present.fx b/bin/resources/shaders/dx11/present.fx index da1b82d0df..3ac4f21be0 100644 --- a/bin/resources/shaders/dx11/present.fx +++ b/bin/resources/shaders/dx11/present.fx @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ +#ifndef PS_HDR +#define PS_HDR 0 +#endif + struct VS_INPUT { float4 p : POSITION; @@ -25,7 +29,7 @@ cbuffer cb0 : register(b0) float2 u_rcp_target_resolution; // 1 / u_target_resolution float2 u_source_resolution; float2 u_rcp_source_resolution; // 1 / u_source_resolution - float u_time; + float2 u_time_and_brightness; // time, user brightness scale (HDR) }; Texture2D Texture; @@ -59,13 +63,32 @@ VS_OUTPUT vs_main(VS_INPUT input) return output; } +PS_OUTPUT EncodeOutput(PS_OUTPUT output) +{ + // If necessary we could convert to any color space here, + // assuming we are starting Rec.709 with gamma 2.2. +#if !PS_HDR && 1 //TODO: Test only! + // Convert to sRGB encoding (useful to test SDR in HDR as Windows interprets SDR content as sRGB) + float3 color_in_excess = output.c.rgb - saturate(output.c.rgb); + output.c.rgb = saturate(output.c.rgb); + output.c.rgb = pow(output.c.rgb, 2.2); + output.c.rgb = (output.c.rgb < 0.0031308) ? (output.c.rgb * 12.92) : (1.055 * pow(output.c.rgb, 0.41666) - 0.055); + output.c.rgb += color_in_excess; +#endif + + // Apply the user brightness level + output.c.rgb *= u_time_and_brightness.y; + + return output; +} + PS_OUTPUT ps_copy(PS_INPUT input) { PS_OUTPUT output; output.c = sample_c(input.t); - return output; + return EncodeOutput(output); } float4 ps_crt(PS_INPUT input, int i) @@ -100,7 +123,7 @@ PS_OUTPUT ps_filter_scanlines(PS_INPUT input) output.c = ps_scanlines(input, p.y % 2); - return output; + return EncodeOutput(output); } PS_OUTPUT ps_filter_diagonal(PS_INPUT input) @@ -111,7 +134,7 @@ PS_OUTPUT ps_filter_diagonal(PS_INPUT input) output.c = ps_crt(input, (p.x + (p.y % 3)) % 3); - return output; + return EncodeOutput(output); } PS_OUTPUT ps_filter_triangular(PS_INPUT input) @@ -123,7 +146,7 @@ PS_OUTPUT ps_filter_triangular(PS_INPUT input) // output.c = ps_crt(input, ((p.x + (p.y & 1) * 3) >> 1) % 3); output.c = ps_crt(input, ((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3); - return output; + return EncodeOutput(output); } static const float PI = 3.14159265359f; @@ -136,7 +159,7 @@ PS_OUTPUT ps_filter_complex(PS_INPUT input) // triangular output.c = (0.9 - 0.4 * cos(2 * PI * input.t.y * texdim.y)) * sample_c(float2(input.t.x, (floor(input.t.y * texdim.y) + 0.5) / texdim.y)); - return output; + return EncodeOutput(output); } //Lottes CRT @@ -155,7 +178,10 @@ PS_OUTPUT ps_filter_complex(PS_INPUT input) // triangular float ToLinear1(float c) { - return c <= 0.04045 ? c / 12.92 : pow((abs(c) + 0.055) / 1.055, 2.4); +#if PS_HDR // Already linear + return c; +#endif + return pow(abs(c), 2.2) * sign(c); } float3 ToLinear(float3 c) @@ -163,14 +189,17 @@ float3 ToLinear(float3 c) return float3(ToLinear1(c.r), ToLinear1(c.g), ToLinear1(c.b)); } -float ToSrgb1(float c) +float ToGamma1(float c) { - return c < 0.0031308 ? c * 12.92 : 1.055 * pow(abs(c), 0.41666) - 0.055; +#if PS_HDR // Already linear + return c; +#endif + return pow(abs(c), 1.0 / 2.2) * sign(c); } -float3 ToSrgb(float3 c) +float3 ToGamma(float3 c) { - return float3(ToSrgb1(c.r), ToSrgb1(c.g), ToSrgb1(c.b)); + return float3(ToGamma1(c.r), ToGamma1(c.g), ToGamma1(c.b)); } float3 Fetch(float2 pos, float2 off) @@ -423,7 +452,7 @@ float4 LottesCRTPass(float4 fragcoord) #if UseShadowMask color.rgb *= Mask(fragcoord.xy); #endif - color.rgb = ToSrgb(color.rgb); + color.rgb = ToGamma(color.rgb); color.a = 1.0; return color; @@ -434,7 +463,7 @@ PS_OUTPUT ps_filter_lottes(PS_INPUT input) PS_OUTPUT output; output.c = LottesCRTPass(input.p); - return output; + return EncodeOutput(output); } PS_OUTPUT ps_4x_rgss(PS_INPUT input) @@ -453,13 +482,182 @@ PS_OUTPUT ps_4x_rgss(PS_INPUT input) color += sample_c(input.t + float2(-l, s) * dxy).rgb; output.c = float4(color * 0.25,1); - return output; + return EncodeOutput(output); +} + +float Luminance(float3 color) +{ + float3 Rec709_Luminance = float3(0.2126, 0.7152, 0.0722); + return dot(color, Rec709_Luminance); +} + +// Non filtered gamma corrected sample (nearest neighbor) +float4 QuickSample(float2 uv, float gamma) +{ + float4 color = Texture.Sample(TextureSampler, uv); //TODO: bilinear or nearest??? +#if !PS_HDR // HDR is already linear + color.rgb = pow(color.rgb, gamma); +#endif + return color; +} +float4 QuickSampleByPixel(float2 xy, float gamma) +{ + return QuickSample(xy * u_rcp_source_resolution, gamma); +} + +// By Sam Belliveau and Filippo Tarpini. Public Domain license. +// Effectively a more accurate sharp bilinear filter when upscaling, +// that also works as a mathematically perfect downscale filter. +// https://entropymine.com/imageworsener/pixelmixing/ +// https://github.com/obsproject/obs-studio/pull/1715 +// https://legacy.imagemagick.org/Usage/filter/ +float4 AreaSampling(float2 uv, float gamma) +{ + // Determine the sizes of the source and target images. + float2 source_size = u_source_resolution; //TODO: "size" for these? + float2 target_size = u_target_resolution; + float2 inverted_target_size = u_rcp_target_resolution; + + // Compute the top-left and bottom-right corners of the target pixel box. + float2 t_beg = floor(uv * target_size); + float2 t_end = t_beg + float2(1.0, 1.0); + + // Convert the target pixel box to source pixel box. + float2 beg = t_beg * inverted_target_size * source_size; + float2 end = t_end * inverted_target_size * source_size; + + // Compute the top-left and bottom-right corners of the pixel box. + float2 f_beg = floor(beg); + float2 f_end = floor(end); + + // Compute how much of the start and end pixels are covered horizontally & vertically. + float area_w = 1.0 - frac(beg.x); + float area_n = 1.0 - frac(beg.y); + float area_e = frac(end.x); + float area_s = frac(end.y); + + // Compute the areas of the corner pixels in the pixel box. + float area_nw = area_n * area_w; + float area_ne = area_n * area_e; + float area_sw = area_s * area_w; + float area_se = area_s * area_e; + + // Initialize the color accumulator. + float4 avg_color = float4(0.0, 0.0, 0.0, 0.0); + float avg_luminance = 0.0; + float4 temp_color; + + float luminance_gamma = 2.2; + float luminance_inv_gamma = 1.0 / luminance_gamma; + + // Prevents rounding errors due to the coordinates flooring above + const float2 offset = float2(0.5, 0.5); + + // Accumulate corner pixels. + temp_color = QuickSampleByPixel(float2(f_beg.x, f_beg.y) + offset, gamma); + avg_color += area_nw * temp_color; + avg_luminance += area_nw * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + temp_color = QuickSampleByPixel(float2(f_end.x, f_beg.y) + offset, gamma); + avg_color += area_ne * temp_color; + avg_luminance += area_ne * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + temp_color = QuickSampleByPixel(float2(f_beg.x, f_end.y) + offset, gamma); + avg_color += area_sw * temp_color; + avg_luminance += area_sw * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + temp_color = QuickSampleByPixel(float2(f_end.x, f_end.y) + offset, gamma); + avg_color += area_se * temp_color; + avg_luminance += area_se * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + + // Determine the size of the pixel box. + int x_range = int(f_end.x - f_beg.x - 0.5); + int y_range = int(f_end.y - f_beg.y - 0.5); + + // Workaround to compile the shader with DX11/12. + // If this isn't done, it will complain that the loop could have too many iterations. + // This number should be enough to guarantee downscaling from very high to very small resolutions. + // Note that this number might be referenced in the UI. + const int max_iterations = 16; + + // Fix up the average calculations in case we reached the upper limit + x_range = min(x_range, max_iterations); + y_range = min(y_range, max_iterations); + + // Accumulate top and bottom edge pixels. + for (int ix = 0; ix < max_iterations; ++ix) + { + if (ix < x_range) + { + float x = f_beg.x + 1.0 + float(ix); + temp_color = QuickSampleByPixel(float2(x, f_beg.y) + offset, gamma); + avg_color += area_n * temp_color; + avg_luminance += area_n * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + temp_color = QuickSampleByPixel(float2(x, f_end.y) + offset, gamma); + avg_color += area_s * temp_color; + avg_luminance += area_s * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + } + } + + // Accumulate left and right edge pixels and all the pixels in between. + for (int iy = 0; iy < max_iterations; ++iy) + { + if (iy < y_range) + { + float y = f_beg.y + 1.0 + float(iy); + + temp_color = QuickSampleByPixel(float2(f_beg.x, y) + offset, gamma); + avg_color += area_w * temp_color; + avg_luminance += area_w * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + temp_color = QuickSampleByPixel(float2(f_end.x, y) + offset, gamma); + avg_color += area_e * temp_color; + avg_luminance += area_e * pow(Luminance(temp_color.rgb), luminance_inv_gamma); + + for (int ix = 0; ix < max_iterations; ++ix) + { + if (ix < x_range) + { + float x = f_beg.x + 1.0 + float(ix); + temp_color = QuickSampleByPixel(float2(x, y) + offset, gamma); + avg_color += temp_color; + avg_luminance += pow(Luminance(temp_color.rgb), luminance_inv_gamma); + } + } + } + } + + // Compute the area of the pixel box that was sampled. + float area_corners = area_nw + area_ne + area_sw + area_se; + float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e); + float area_center = float(x_range) * float(y_range); + + float4 nrm_color = avg_color / (area_corners + area_edges + area_center); + float target_nrm_color_luminance = avg_luminance / (area_corners + area_edges + area_center); + +#if PS_HDR + // Restore the averaged "gamma" space luminance, for better gamma correction. + // This retains the best feature of gamma correct sampling (no hue shifts), + // while also maintaining the perceptual "brightness" level of blending two colors with an alpha + // (in linear space a 0.5 alpha won't produce a color that has a perceptual brightness in the middle point of the two source colors). + float nrm_color_luminance = Luminance(nrm_color.rgb); + if (nrm_color_luminance != 0.0) + { + nrm_color.rgb *= pow(target_nrm_color_luminance, luminance_gamma) / nrm_color_luminance; + } +#endif + + // Return the normalized average color. + return nrm_color; } PS_OUTPUT ps_automagical_supersampling(PS_INPUT input) { PS_OUTPUT output; +#if 1 //TODO: ... + float source_gamma = 2.2f; +#if PS_HDR + source_gamma = 1.f; +#endif + output.c = AreaSampling(input.t, source_gamma); +#else float2 ratio = (u_source_size / u_target_size) * 0.5; float2 steps = floor(ratio); float3 col = sample_c(input.t).rgb; @@ -474,7 +672,9 @@ PS_OUTPUT ps_automagical_supersampling(PS_INPUT input) div++; } } - + output.c = float4(col / div, 1); - return output; +#endif + + return EncodeOutput(output); } diff --git a/bin/resources/shaders/dx11/shadeboost.fx b/bin/resources/shaders/dx11/shadeboost.fx deleted file mode 100644 index e9911d203d..0000000000 --- a/bin/resources/shaders/dx11/shadeboost.fx +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team -// SPDX-License-Identifier: GPL-3.0+ - -Texture2D Texture; -SamplerState Sampler; - -cbuffer cb0 -{ - float4 params; -}; - -/* -** Contrast, saturation, brightness -** Code of this function is from TGM's shader pack -** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 -*/ - -// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% -float4 ContrastSaturationBrightness(float4 color) // Ported to HLSL -{ - float brt = params.x; - float con = params.y; - float sat = params.z; - - // Increase or decrease these values to adjust r, g and b color channels separately - const float AvgLumR = 0.5; - const float AvgLumG = 0.5; - const float AvgLumB = 0.5; - - const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721); - - float3 AvgLumin = float3(AvgLumR, AvgLumG, AvgLumB); - float3 brtColor = color.rgb * brt; - float3 intensity = dot(brtColor, LumCoeff); - float3 satColor = lerp(intensity, brtColor, sat); - float3 conColor = lerp(AvgLumin, satColor, con); - - color.rgb = conColor; - return color; -} - -struct PS_INPUT -{ - float4 p : SV_Position; - float2 t : TEXCOORD0; -}; - -float4 ps_main(PS_INPUT input) : SV_Target0 -{ - float4 c = Texture.Sample(Sampler, input.t); - return ContrastSaturationBrightness(c); -} diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 2bd548ec60..86b7af8121 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -77,12 +77,30 @@ #define PS_NO_COLOR 0 #define PS_NO_COLOR1 0 #define PS_DATE 0 +#define PS_HDR 0 +#endif + +//TODO: clear +#if 0 +#undef PS_HDR +#define PS_HDR 0 +#endif +#if 0 +#undef PS_HDR +#define PS_HDR 1 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) #define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1)) #define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED) +//TODO: test this 0.1? +#if PS_HDR +#define RT_COLOR_OFFSET 0.0f +#else +#define RT_COLOR_OFFSET 0.1f +#endif + struct VS_INPUT { float2 st : TEXCOORD0; @@ -172,10 +190,32 @@ cbuffer cb1 float RcpScaleFactor; }; +float fmod_positive(float a, float b) +{ + return fmod(fmod(a, b) + b, b); +} +float3 fmod_positive(float3 a, float b) +{ + return float3(fmod_positive(a.x, b), fmod_positive(a.y, b), fmod_positive(a.z, b)); +} +float fmod_mask_positive(float a, float b) +{ + // Don't wrap if the number if a multiple, to emulate bit mask operators + if (fmod(a, b) == 0.f && a != 0.f) + { + return b; + } + return fmod(fmod(a, b) + b, b); +} +float3 fmod_mask_positive(float3 a, float b) +{ + return float3(fmod_mask_positive(a.x, b), fmod_mask_positive(a.y, b), fmod_mask_positive(a.z, b)); +} + float4 sample_c(float2 uv, float uv_w) { #if PS_TEX_IS_FB == 1 - return RtTexture.Load(int3(int2(uv * WH.zw), 0)); + return RtTexture.Load(int3(int2(uv * WH.zw), 0)); //TODO: why are these not rounded? #elif PS_REGION_RECT == 1 return Texture.Load(int3(int2(uv), 0)); #else @@ -187,7 +227,9 @@ float4 sample_c(float2 uv, float uv_w) // I'm manually adjusting coordinates to the centre of texels here, // though the centre is just paranoia, the top left corner works fine. // As of 2018 this issue is still present. +#if PS_HDR && 0 uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw; +#endif } #if !PS_ADJS && !PS_ADJT uv *= STScale; @@ -225,14 +267,30 @@ float4 sample_c(float2 uv, float uv_w) #endif } +#if PS_HDR +float4 sample_p(float u) +#else float4 sample_p(uint u) +#endif { + if (PS_HDR) //TODO: add linear sampling and improved LUT extrapolation + { + float2 size; + Palette.GetDimensions(size.x, size.y); + // Y is always 1 texel large + float excess = max(u - (size.x - 1.f), 0.f) / size.x; + return Palette.Load(int3(int(round(u)), 0, 0)) * (excess + 1.f); + } return Palette.Load(int3(int(u), 0, 0)); } float4 sample_p_norm(float u) { - return sample_p(uint(u * 255.5f)); +#if PS_HDR + return sample_p(u * 255.0f); +#else + return sample_p(u * 255.5f); //TODO: why is this denormalized to 255.5? +#endif } float4 clamp_wrap_uv(float4 uv) @@ -255,7 +313,7 @@ float4 clamp_wrap_uv(float4 uv) } else if(PS_WMS == 3) { - #if PS_FST == 0 +#if PS_FST == 0 // wrap negative uv coords to avoid an off by one error that shifted // textures. Fixes Xenosaga's hair issue. uv = frac(uv); @@ -326,7 +384,12 @@ float4x4 sample_4c(float4 uv, float uv_w) return c; } -uint4 sample_4_index(float4 uv, float uv_w) +#if PS_HDR +float4 +#else +uint4 +#endif + sample_4_index(float4 uv, float uv_w) { float4 c; @@ -336,26 +399,38 @@ uint4 sample_4_index(float4 uv, float uv_w) c.w = sample_c(uv.zw, uv_w).a; // Denormalize value - uint4 i; - + float4 i; + +#if !PS_HDR if (PS_RTA_SRC_CORRECTION) { - i = uint4(round(c * 128.25f)); // Denormalize value + i = round(c * 128.25f); // Denormalize value //TODO: why rounding? } else { - i = uint4(c * 255.5f); // Denormalize value + i = c * 255.5f; // Denormalize value } +#else + i = c * (PS_RTA_SRC_CORRECTION ? 127.5f : 255.f); //TODO: is 127.5 the right halved value? +#endif if (PS_PAL_FMT == 1) { // 4HL - return i & 0xFu; +#if !PS_HDR + return (uint4)i & 0xFu; +#else + return fmod(i, 16.f); // Note: negative handling is a bit random here but it should be fine +#endif } else if (PS_PAL_FMT == 2) { // 4HH - return i >> 4u; +#if !PS_HDR + return (uint4)i >> 4u; +#else + return max(i - pow(2.f, 4.f), min(i, 0.f)) / pow(2.f, 4.f); //TODO: this formula doesn't match the SDR one... As it doesn't mask out the first 4 bits first +#endif } else { @@ -364,7 +439,11 @@ uint4 sample_4_index(float4 uv, float uv_w) } } +#if PS_HDR +float4x4 sample_4p(float4 u) +#else float4x4 sample_4p(uint4 u) +#endif { float4x4 c; @@ -510,7 +589,7 @@ float4 sample_depth(float2 st, float2 pos) } else if (PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE) { - t = trunc(sample_4p(uint4(t.aaaa))[0] * 255.0f + 0.05f); + t = trunc(sample_4p(uint4(t.aaaa))[0] * 255.0f + 0.05f); //TODO: ... (depth) } return t; @@ -594,7 +673,7 @@ float4 fetch_gXbY(int2 xy) } else { - int4 rt = (int4)(fetch_raw_color(xy) * 255.0); + int4 rt = (int4)(fetch_raw_color(xy) * 255.0); //TODO: add float support? int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z; int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x; return (float4)(green | blue); @@ -637,9 +716,16 @@ float4 sample_color(float2 st, float uv_w) uv = clamp_wrap_uv(uv); #if PS_PAL_FMT != 0 - c = sample_4p(sample_4_index(uv, uv_w)); + c = sample_4p(sample_4_index(uv, uv_w)); #else - c = sample_4c(uv, uv_w); + c = sample_4c(uv, uv_w); +#endif +#if 0 + c[0].a = saturate(c[0].a); + c[1].a = saturate(c[1].a); + c[2].a = saturate(c[2].a); + c[3].a = saturate(c[3].a); + c[0] = saturate(c[0]); #endif } @@ -652,7 +738,7 @@ float4 sample_color(float2 st, float uv_w) } else if(PS_AEM_FMT == FMT_16) { - c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(int3(c[i].rgb * 255.0f) & 0xF8) ? TA.x : 0; + c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(int3(c[i].rgb * 255.0f) & 0xF8) ? TA.x : 0; //TODO: remove trunc/mask? Nah } } @@ -666,15 +752,23 @@ float4 sample_color(float2 st, float uv_w) } if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION) - t.a = t.a * (128.5f / 255.0f); - + t.a = t.a * (128.5f / 255.0f); //TODO: why 128.5 normalization? + + if (PS_HDR) //TODO: test... good change??? Fixes raised blacks? + { + return t * 255.0f; + } return trunc(t * 255.0f + 0.05f); } float4 tfx(float4 T, float4 C) { float4 C_out; - float4 FxT = trunc((C * T) / 128.0f); + float4 FxT = (C * T) / 128.0f; + if (PS_HDR == 0) + { + FxT = trunc(FxT); + } #if (PS_TFX == 0) C_out = FxT; @@ -696,7 +790,10 @@ float4 tfx(float4 T, float4 C) #if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3) // Clamp only when it is useful - C_out = min(C_out, 255.0f); + if (PS_HDR == 0) + { + C_out = min(C_out, 255.0f); + } #endif return C_out; @@ -733,7 +830,12 @@ float4 fog(float4 c, float f) { if(PS_FOG) { - c.rgb = trunc(lerp(FogColor, c.rgb, f)); + c.rgb = lerp(FogColor, c.rgb, f); + if (PS_HDR == 0) + { + c.rgb = trunc(c.rgb); + } + //TODO: try luminance preserving fog? } return c; @@ -769,7 +871,7 @@ float4 ps_color(PS_INPUT input) if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)) { - uint4 denorm_c_before = uint4(T); + uint4 denorm_c_before = uint4(T); //TODO: allow float? or int if (PS_PROCESS_BA & SHUFFLE_READ) { T.r = float((denorm_c_before.b << 3) & 0xF8u); @@ -799,15 +901,32 @@ void ps_fbmask(inout float4 C, float2 pos_xy) { if (PS_FBMASK) { - float multi = PS_COLCLIP_HW ? 65535.0f : 255.0f; - float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * multi + 0.1f); - C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask)); +#if 1 //TODO: test! + if (PS_HDR && !PS_COLCLIP_HW) + { + float4 RT = RtTexture.Load(int3(pos_xy, 0)) * 255.0f; + bool4 hi_bit = (FbMask & 0x80) != 0; + RT = hi_bit ? RT : min(RT, 255.0f); + C = hi_bit ? min(C, 255.0f) : C; + uint4 RTi = (uint4)(RT + 0.5f); //TODO: make int? Or better, use fmod! Also what if we have numbers higher than the mask peak countbits? + uint4 Ci = (uint4)(C + 0.5f); + uint4 mask = ((int4)FbMask << 24) >> 24; // Sign extend mask + C = (float4)((Ci & ~mask) | (RTi & mask)); + } + else +#endif + { + float multi = PS_COLCLIP_HW ? 65535.0f : 255.0f; + float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * multi + RT_COLOR_OFFSET); + C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask)); + } } } void ps_dither(inout float3 C, float As, float2 pos_xy) { - if (PS_DITHER > 0 && PS_DITHER < 3) + // Dithering shouldn't be particularly needed in HDR (though it won't necessarily hurt, especially on top of low quality source textures) + if (PS_DITHER > 0 && PS_DITHER < 3 && PS_HDR == 0) { int2 fpos; @@ -825,7 +944,7 @@ void ps_dither(inout float3 C, float As, float2 pos_xy) float Alpha = PS_BLEND_C == 2 ? Af : As; value *= Alpha > 0.0f ? min(1.0f / Alpha, 1.0f) : 1.0f; } - + if (PS_ROUND_INV) C -= value; else @@ -835,25 +954,42 @@ void ps_dither(inout float3 C, float As, float2 pos_xy) void ps_color_clamp_wrap(inout float3 C) { + int mask = 0; + // When dithering the bottom 3 bits become meaningless and cause lines in the picture // so we need to limit the color depth on dithered items + // 0xF8 is 248, which is "11111000" (lower 3 bits are masked off, as in, the lower values) if (SW_BLEND || (PS_DITHER > 0 && PS_DITHER < 3) || PS_FBMASK) { - if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV) - C += 7.0f; // Need to round up, not down since the shader will invert + if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV) //TODO: HDR!??? + C += float(0xFF - 0xF8); // Need to round up, not down since the shader will invert // Standard Clamp if (PS_COLCLIP == 0 && PS_COLCLIP_HW == 0) - C = clamp(C, (float3)0.0f, (float3)255.0f); + { + if (PS_HDR == 0) + C = clamp(C, (float3)0.0f, (float3)255.0f); + else // Without this, bloom in some games can go negative and make the scene darker //TODO: why... This removes BT.2020 colors, is there a better alternative? + C = max(C, (float3)0.0f); + } // In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania if (PS_DST_FMT == FMT_16 && PS_DITHER != 3 && (PS_BLEND_MIX == 0 || PS_DITHER)) - C = (float3)((int3)C & (int3)0xF8); + mask = 0xF8; else if (PS_COLCLIP == 1 || PS_COLCLIP_HW == 1) - C = (float3)((int3)C & (int3)0xFF); + mask = 0xFF; } else if (PS_DST_FMT == FMT_16 && PS_DITHER != 3 && PS_BLEND_MIX == 0 && PS_BLEND_HW == 0) - C = (float3)((int3)C & (int3)0xF8); + mask = 0xF8; + + if (mask != 0) + { +#if PS_HDR // Avoid quantization to 8bit in HDR + C = mask == 0xFF ? fmod_mask_positive(C, 255.f) : (C - fmod_positive(C, 8)); // 248 → 255 - 7 = 248 +#else + C = (float3)((int3)C & (int3)mask); +#endif + } } void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) @@ -880,7 +1016,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) if (PS_SHUFFLE && SW_BLEND_NEEDS_RT) { - uint4 denorm_rt = uint4(RT); + uint4 denorm_rt = uint4(RT); //TODO: allow float? or int if (PS_PROCESS_BA & SHUFFLE_WRITE) { RT.r = float((denorm_rt.b << 3) & 0xF8u); @@ -897,9 +1033,17 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) } } - float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f; + float Ad = PS_RTA_CORRECTION ? ((RT.a * 128.0f + RT_COLOR_OFFSET) / 128.0f) : ((RT.a * 255.0f + RT_COLOR_OFFSET) / 128.0f); + //if (PS_HDR == 0) //TODO: do we even care about not truncating alpha? Probably not (port to VK in case) + { + Ad = trunc(Ad); + } float color_multi = PS_COLCLIP_HW ? 65535.0f : 255.0f; - float3 Cd = trunc(RT.rgb * color_multi + 0.1f); + float3 Cd = RT.rgb * color_multi + RT_COLOR_OFFSET; + if (PS_HDR == 0) + { + Cd = trunc(Cd); + } float3 Cs = Color.rgb; float3 A = (PS_BLEND_A == 0) ? Cs : ((PS_BLEND_A == 1) ? Cd : (float3)0.0f); @@ -915,19 +1059,29 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) if (PS_BLEND_A == PS_BLEND_B) Color.rgb = D; - // In blend_mix, HW adds on some alpha factor * dst. - // Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation. - // Instead, apply an offset to convert HW's round to a floor. - // Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision. - // But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399 - // Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause. - // 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256). else if (PS_BLEND_MIX == 2) - Color.rgb = ((A - B) * C_clamped + D) + (124.0f / 256.0f); + Color.rgb = (A - B) * C_clamped + D; else if (PS_BLEND_MIX == 1) - Color.rgb = ((A - B) * C_clamped + D) - (124.0f / 256.0f); + Color.rgb = (A - B) * C_clamped + D; else - Color.rgb = trunc(((A - B) * C) + D); + Color.rgb = (A - B) * C + D; + + if (PS_BLEND_A != PS_BLEND_B && PS_HDR == 0) + { + // In blend_mix, HW adds on some alpha factor * dst. + // Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation. + // Instead, apply an offset to convert HW's round to a floor. + // Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision. + // But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399 + // Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause. + // 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256). + if (PS_BLEND_MIX == 2) + Color.rgb += 124.0f / 256.0f; + else if (PS_BLEND_MIX == 1) + Color.rgb -= 124.0f / 256.0f; + else + Color.rgb = trunc(Color.rgb); + } if (PS_BLEND_HW == 1) { @@ -938,7 +1092,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) // we pick the lowest overflow from all colors because it's the safest, // we divide by 255 the color because we don't know Cd value, // changed alpha should only be done for hw blend. - float3 alpha_compensate = max((float3)1.0f, Color.rgb / (float3)255.0f); + float3 alpha_compensate = max((float3)1.0f, Color.rgb / (float3)255.0f); //TODO: now Color.rgb could be > 1? As_rgba.rgb -= alpha_compensate; } else if (PS_BLEND_HW == 2) @@ -956,7 +1110,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) As_rgba.rgb = (float3)C_clamped; // Cs*(Alpha + 1) might overflow, if it does then adjust alpha value // that is sent on second output to compensate. - float3 overflow_check = (Color.rgb - (float3)255.0f) / 255.0f; + float3 overflow_check = (Color.rgb - (float3)255.0f) / 255.0f; //TODO: now Color.rgb could be > 1? float3 alpha_compensate = max((float3)0.0f, overflow_check); As_rgba.rgb -= alpha_compensate; } @@ -1039,7 +1193,11 @@ PS_OUTPUT ps_main(PS_INPUT input) float4 alpha_blend = (float4)0.0f; if (SW_AD_TO_HW) { - float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + 0.1f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f); + float4 RT = PS_RTA_CORRECTION ? (RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + RT_COLOR_OFFSET) : (RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + RT_COLOR_OFFSET); + if (PS_HDR == 0) + { + RT = trunc(RT); + } alpha_blend = (float4)(RT.a / 128.0f); } else @@ -1088,7 +1246,7 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (!PS_SHUFFLE_SAME && !PS_READ16_SRC && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)) { - uint4 denorm_c_after = uint4(C); + uint4 denorm_c_after = uint4(C); //TODO: allow int? if (PS_PROCESS_BA & SHUFFLE_READ) { C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u)); @@ -1105,7 +1263,7 @@ PS_OUTPUT ps_main(PS_INPUT input) // Special case for 32bit input and 16bit output, shuffle used by The Godfather if (PS_SHUFFLE_SAME) { - uint4 denorm_c = uint4(C); + uint4 denorm_c = uint4(C); //TODO: allow int? if (PS_PROCESS_BA & SHUFFLE_READ) C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); @@ -1115,7 +1273,7 @@ PS_OUTPUT ps_main(PS_INPUT input) // Copy of a 16bit source in to this target else if (PS_READ16_SRC) { - uint4 denorm_c = uint4(C); + uint4 denorm_c = uint4(C); //TODO: allow int? uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)); if (denorm_c.a & 0x80u) @@ -1156,10 +1314,16 @@ PS_OUTPUT ps_main(PS_INPUT input) #endif #if !PS_NO_COLOR - output.c0.a = PS_RTA_CORRECTION ? C.a / 128.0f : C.a / 255.0f; - output.c0.rgb = PS_COLCLIP_HW ? float3(C.rgb / 65535.0f) : C.rgb / 255.0f; + output.c0.a = PS_RTA_CORRECTION ? (C.a / 128.0f) : (C.a / 255.0f); + output.c0.rgb = PS_COLCLIP_HW ? (C.rgb / 65535.0f) : (C.rgb / 255.0f); + output.c0.a = clamp(output.c0.a, 0.f, 2.f); //TODO: ... +#if PS_COLCLIP_HW && 0 //TODO: test... It's not recursive... + // Pre wrap negative values as we can't store negative colors in colclip hw textures! + output.c0.rgb = output.c0.rgb < 0.f ? (1.f + output.c0.rgb) : output.c0.rgb; +#endif #if !PS_NO_COLOR1 output.c1 = alpha_blend; + output.c1.a = clamp(output.c1.a, 0.f, 2.f); //TODO: ... #endif #endif // !PS_NO_COLOR diff --git a/bin/resources/shaders/opengl/colorcorrect.glsl b/bin/resources/shaders/opengl/colorcorrect.glsl new file mode 100644 index 0000000000..7b9a09f2ff --- /dev/null +++ b/bin/resources/shaders/opengl/colorcorrect.glsl @@ -0,0 +1,105 @@ +// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +//#version 420 // Keep it for editor detection + +// SMPTE 170M - BT.601 (NTSC-M) -> BT.709 +mat3 from_NTSCM = transpose(mat3( + 0.939497225737661, 0.0502268452914346, 0.0102759289709032, + 0.0177558637510127, 0.965824605885027, 0.0164195303639603, + -0.00162163209967010, -0.00437400622653655, 1.00599563832621)); + +// ARIB TR-B9 (9300K+27MPCD with chromatic adaptation) (NTSC-J) -> BT.709 +mat3 from_NTSCJ = transpose(mat3( + 0.823613036967492, -0.0943227111084757, 0.00799341532931119, + 0.0289258355537324, 1.02310733489462, 0.00243547111576797, + -0.00569501554980891, 0.0161828357559315, 1.22328453915712)); + +// EBU - BT.470BG/BT.601 (PAL) -> BT.709 +mat3 from_PAL = transpose(mat3( + 1.04408168421813, -0.0440816842181253, 0.000000000000000, + 0.000000000000000, 1.00000000000000, 0.000000000000000, + 0.000000000000000, 0.0118044782106489, 0.988195521789351)); + +/* +** Contrast, saturation, brightness +** Code of this function is from TGM's shader pack +** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 +** TGM's author comment about the license (included in the previous link) +** "do with it, what you want! its total free! +** (but would be nice, if you say that you used my shaders :wink: ) but not necessary" +*/ + +#ifdef FRAGMENT_SHADER + +uniform vec4 correction; +uniform vec4 adjustment; + +in vec4 PSin_p; +in vec2 PSin_t; +in vec4 PSin_c; + +layout(binding = 0) uniform sampler2D TextureSampler; + +layout(location = 0) out vec4 SV_Target0; + +// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% +vec4 ContrastSaturationBrightness(vec4 color) +{ + float brt = adjustment.x; + float con = adjustment.y; + float sat = adjustment.z; + +#if 1 // For linear space in/out + vec3 AvgLumin = vec3(0.18); // Mid gray +#else + // Increase or decrease these values to adjust r, g and b color channels separately + const float AvgLumR = 0.5; + const float AvgLumG = 0.5; + const float AvgLumB = 0.5; + vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB); +#endif + + const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721); + + vec3 brtColor = color.rgb * brt; + float dot_intensity = dot(brtColor, LumCoeff); + vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity); + vec3 satColor = mix(intensity, brtColor, sat); + vec3 conColor = mix(AvgLumin, satColor, con); + + color.rgb = conColor; + return color; +} + +void ps_main() +{ + vec4 c = texture(TextureSampler, PSin_t); + + // Linearize + c.rgb = pow(abs(c.rgb), vec3(correction.x)) * sign(c.rgb); + + // Convert to BT.709 from the user specified game color space + if (correction.y == 1.f) + { + c.rgb = c.rgb * from_NTSCM; + } + else if (correction.y == 2.f) + { + c.rgb = c.rgb * from_NTSCJ; + } + else if (correction.y == 3.f) + { + c.rgb = c.rgb * from_PAL; + } + + c = ContrastSaturationBrightness(c); + + // Convert to Gamma 2.2 (not sRGB) + c.rgb = pow(max(c.rgb, vec3(0.0)), vec3(1.0 / 2.2)); + + SV_Target0 = c; +} + + +#endif diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl index 27320a82ce..d348cda7a8 100644 --- a/bin/resources/shaders/opengl/convert.glsl +++ b/bin/resources/shaders/opengl/convert.glsl @@ -52,6 +52,23 @@ vec4 sample_c() return texture(TextureSampler, PSin_t); } +float saturate(float c) +{ + return clamp(c, 0.0, 1.0); +} +vec2 saturate(vec2 c) +{ + return clamp(c, 0.0, 1.0); +} +vec3 saturate(vec3 c) +{ + return clamp(c, 0.0, 1.0); +} +vec4 saturate(vec4 c) +{ + return clamp(c, 0.0, 1.0); +} + #ifdef ps_copy void ps_copy() { @@ -88,7 +105,7 @@ void ps_downsample_copy() // Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life void ps_convert_rgba8_16bits() { - highp uvec4 i = uvec4(sample_c() * vec4(255.5f, 255.5f, 255.5f, 255.5f)); + highp uvec4 i = uvec4(saturate(sample_c()) * vec4(255.5f, 255.5f, 255.5f, 255.5f)); SV_Target1 = ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8); } @@ -122,25 +139,25 @@ void ps_convert_float16_rgb5a1() float rgba8_to_depth32(vec4 unorm) { - uvec4 c = uvec4(unorm * vec4(255.5f)); + uvec4 c = uvec4(saturate(unorm) * vec4(255.5f)); return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); } float rgba8_to_depth24(vec4 unorm) { - uvec3 c = uvec3(unorm.rgb * vec3(255.5f)); + uvec3 c = uvec3(saturate(unorm.rgb) * vec3(255.5f)); return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); } float rgba8_to_depth16(vec4 unorm) { - uvec2 c = uvec2(unorm.rg * vec2(255.5f)); + uvec2 c = uvec2(saturate(unorm.rg) * vec2(255.5f)); return float(c.r | (c.g << 8)) * exp2(-32.0f); } float rgb5a1_to_depth16(vec4 unorm) { - uvec4 c = uvec4(unorm * vec4(255.5f)); + uvec4 c = uvec4(saturate(unorm) * vec4(255.5f)); return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); } @@ -352,6 +369,7 @@ void ps_rta_decorrection() void ps_colclip_init() { vec4 value = sample_c(); + value.rgb = saturate(value.rgb); // Clamp to [0,1] range given we might have upgraded the "Color" texture to float/HDR, to avoid overflow SV_Target0 = vec4(round(value.rgb * 255.0f) / 65535.0f, value.a); } #endif diff --git a/bin/resources/shaders/opengl/present.glsl b/bin/resources/shaders/opengl/present.glsl index 4ba45cd967..20cda3dd12 100644 --- a/bin/resources/shaders/opengl/present.glsl +++ b/bin/resources/shaders/opengl/present.glsl @@ -42,7 +42,7 @@ uniform vec2 u_target_resolution; uniform vec2 u_rcp_target_resolution; // 1 / u_target_resolution uniform vec2 u_source_resolution; uniform vec2 u_rcp_source_resolution; // 1 / u_source_resolution -uniform float u_time; +uniform vec2 u_time_and_brightness; // time, user brightness scale (HDR) in vec4 PSin_p; in vec2 PSin_t; @@ -152,7 +152,7 @@ void ps_filter_complex() float ToLinear1(float c) { - return c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4); + return pow(abs(c), 2.2) * sign(c); } vec3 ToLinear(vec3 c) @@ -160,14 +160,14 @@ vec3 ToLinear(vec3 c) return vec3(ToLinear1(c.r), ToLinear1(c.g), ToLinear1(c.b)); } -float ToSrgb1(float c) +float ToGamma1(float c) { - return c < 0.0031308 ? c * 12.92 : 1.055 * pow(c, 0.41666) - 0.055; + return pow(abs(c), 1.0 / 2.2) * sign(c); } -vec3 ToSrgb(vec3 c) +vec3 ToGamma(vec3 c) { - return vec3(ToSrgb1(c.r), ToSrgb1(c.g), ToSrgb1(c.b)); + return vec3(ToGamma1(c.r), ToGamma1(c.g), ToGamma1(c.b)); } vec3 Fetch(vec2 pos, vec2 off) @@ -421,7 +421,7 @@ vec4 LottesCRTPass() #if UseShadowMask color.rgb *= Mask(fragcoord.xy); #endif - color.rgb = ToSrgb(color.rgb); + color.rgb = ToGamma(color.rgb); return color; } diff --git a/bin/resources/shaders/opengl/shadeboost.glsl b/bin/resources/shaders/opengl/shadeboost.glsl deleted file mode 100644 index 19abb3cbe5..0000000000 --- a/bin/resources/shaders/opengl/shadeboost.glsl +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team -// SPDX-License-Identifier: GPL-3.0+ - -//#version 420 // Keep it for editor detection - -/* -** Contrast, saturation, brightness -** Code of this function is from TGM's shader pack -** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 -** TGM's author comment about the license (included in the previous link) -** "do with it, what you want! its total free! -** (but would be nice, if you say that you used my shaders :wink: ) but not necessary" -*/ - -#ifdef FRAGMENT_SHADER - -uniform vec4 params; - -in vec4 PSin_p; -in vec2 PSin_t; -in vec4 PSin_c; - -layout(binding = 0) uniform sampler2D TextureSampler; - -layout(location = 0) out vec4 SV_Target0; - -// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% -vec4 ContrastSaturationBrightness(vec4 color) -{ - float brt = params.x; - float con = params.y; - float sat = params.z; - - // Increase or decrease these values to adjust r, g and b color channels separately - const float AvgLumR = 0.5; - const float AvgLumG = 0.5; - const float AvgLumB = 0.5; - - const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721); - - vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB); - vec3 brtColor = color.rgb * brt; - float dot_intensity = dot(brtColor, LumCoeff); - vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity); - vec3 satColor = mix(intensity, brtColor, sat); - vec3 conColor = mix(AvgLumin, satColor, con); - - color.rgb = conColor; - return color; -} - - -void ps_main() -{ - vec4 c = texture(TextureSampler, PSin_t); - SV_Target0 = ContrastSaturationBrightness(c); -} - - -#endif diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index f290e8d5e8..f9336afa58 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -753,7 +753,7 @@ void ps_color_clamp_wrap(inout vec3 C) #if SW_BLEND || (PS_DITHER > 0 && PS_DITHER < 3) || PS_FBMASK #if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV - C += 7.0f; // Need to round up, not down since the shader will invert + C += 7.0f; // Need to round up, not down since the shader will invert (0xFF - 0xF8) #endif // Correct the Color value based on the output format @@ -1126,7 +1126,7 @@ void ps_main() SV_Target0.a = C.a / 255.0f; #endif #if PS_COLCLIP_HW == 1 - SV_Target0.rgb = vec3(C.rgb / 65535.0f); + SV_Target0.rgb = C.rgb / 65535.0f; #else SV_Target0.rgb = C.rgb / 255.0f; #endif diff --git a/bin/resources/shaders/vulkan/colorcorrect.glsl b/bin/resources/shaders/vulkan/colorcorrect.glsl new file mode 100644 index 0000000000..b53a2744e9 --- /dev/null +++ b/bin/resources/shaders/vulkan/colorcorrect.glsl @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +//#version 420 // Keep it for editor detection + +#ifndef PS_HDR_INPUT +#define PS_HDR_INPUT 0 +#endif +#ifndef PS_HDR_OUTPUT +#define PS_HDR_OUTPUT 0 +#endif + +// SMPTE 170M - BT.601 (NTSC-M) -> BT.709 +mat3 from_NTSCM = transpose(mat3( + 0.939497225737661, 0.0502268452914346, 0.0102759289709032, + 0.0177558637510127, 0.965824605885027, 0.0164195303639603, + -0.00162163209967010, -0.00437400622653655, 1.00599563832621)); + +// ARIB TR-B9 (9300K+27MPCD with chromatic adaptation) (NTSC-J) -> BT.709 +mat3 from_NTSCJ = transpose(mat3( + 0.823613036967492, -0.0943227111084757, 0.00799341532931119, + 0.0289258355537324, 1.02310733489462, 0.00243547111576797, + -0.00569501554980891, 0.0161828357559315, 1.22328453915712)); + +// EBU - BT.470BG/BT.601 (PAL) -> BT.709 +mat3 from_PAL = transpose(mat3( + 1.04408168421813, -0.0440816842181253, 0.000000000000000, + 0.000000000000000, 1.00000000000000, 0.000000000000000, + 0.000000000000000, 0.0118044782106489, 0.988195521789351)); + +// Applies exponential ("Photographic") luminance/luma compression. +float RangeCompress(float X) +{ + // Branches are for static parameters optimizations + // This does e^X. We expect X to be between 0 and 1. + return 1.f - exp(-X); +} + +// Refurbished DICE HDR tonemapper (per channel or luminance). +float LuminanceCompress( + float InValue, + float OutMaxValue, + float ShoulderStart /*= 0.f*/) +{ + const float compressableValue = InValue - ShoulderStart; + const float compressedRange = OutMaxValue - ShoulderStart; + const float possibleOutValue = ShoulderStart + compressedRange * RangeCompress(compressableValue / compressedRange); + return (InValue <= ShoulderStart) ? InValue : possibleOutValue; +} + +#ifdef VERTEX_SHADER + +layout(location = 0) in vec4 a_pos; +layout(location = 1) in vec2 a_tex; + +layout(location = 0) out vec2 v_tex; + +void main() +{ + gl_Position = vec4(a_pos.x, -a_pos.y, a_pos.z, a_pos.w); + v_tex = a_tex; +} + +#endif + +/* +** Contrast, saturation, brightness +** Code of this function is from TGM's shader pack +** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 +** TGM's author comment about the license (included in the previous link) +** "do with it, what you want! its total free! +** (but would be nice, if you say that you used my shaders :wink: ) but not necessary" +*/ + +#ifdef FRAGMENT_SHADER + +layout(push_constant) uniform cb0 +{ + vec4 correction; + vec4 adjustment; +}; + +layout(set = 0, binding = 0) uniform sampler2D samp0; +layout(location = 0) in vec2 v_tex; +layout(location = 0) out vec4 o_col0; + +// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% +vec4 ContrastSaturationBrightness(vec4 color) +{ + float brt = adjustment.x; + float con = adjustment.y; + float sat = adjustment.z; + +#if 1 // For linear space in/out + vec3 AvgLumin = vec3(0.18); // Mid gray +#else + // Increase or decrease these values to adjust r, g and b color channels separately + const float AvgLumR = 0.5; + const float AvgLumG = 0.5; + const float AvgLumB = 0.5; + vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB); +#endif + + const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721); // Rec.709 + + vec3 brtColor = color.rgb * brt; + float dot_intensity = dot(brtColor, LumCoeff); + vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity); + vec3 satColor = mix(intensity, brtColor, sat); + vec3 conColor = mix(AvgLumin, satColor, con); + + color.rgb = conColor; + return color; +} + +void main() +{ + vec4 c = texture(samp0, v_tex); + +#if PS_HDR_INPUT + // Tonemap in gamma space (this specific formula looks better with it) and by channel, to best retain the original color hues. + // Theoretically tonemapping should be done in the color space of the output display (e.g. BT.2020 in HDR and BT.709 in SDR), + // because displays usually clip individual rgb values to the peak brightness value of HDR, + // but for simplicity, we do it in the raw game color space. + + // In HDR, we only compress the range above SDR (1), in SDR, we compress the top 20% range, to avoid clipping and retain HDR detail. + float shoulderStart = 1.f; +#if !PS_HDR_OUTPUT + shoulderStart = 0.8f; +#endif + + float peakWhite = correction.w; + + c.r = LuminanceCompress(c.r, peakWhite, shoulderStart); + c.g = LuminanceCompress(c.g, peakWhite, shoulderStart); + c.b = LuminanceCompress(c.b, peakWhite, shoulderStart); +#endif + + // Linearize + c.rgb = pow(abs(c.rgb), vec3(correction.x)) * sign(c.rgb); + +#if PS_HDR_OUTPUT && 0 // Print HDR colors + if (any(c.rgb > 1.0)) + { + c.rgb = vec3(1, 0, 1); + } +#endif + + // Convert to BT.709 from the user specified game color space + if (correction.y == 1.f) + { + c.rgb = c.rgb * from_NTSCM; + } + else if (correction.y == 2.f) + { + c.rgb = c.rgb * from_NTSCJ; + } + else if (correction.y == 3.f) + { + c.rgb = c.rgb * from_PAL; + } + + float HDRPaperWhite = correction.z; + +#if PS_HDR_OUTPUT && 0 // AutoHDR + c.rgb = PumboAutoHDR(c.rgb, 750.0, HDRPaperWhite * 80.0); +#endif + + c = ContrastSaturationBrightness(c); + +#if 0 // Moved to presentation + c.rgb *= HDRPaperWhite; +#endif + +#if PS_HDR_OUTPUT + // Leave as linear, for scRGB HDR +#else + // Convert to Gamma 2.2 (not sRGB) + c.rgb = pow(max(c.rgb, vec3(0.0)), vec3(1.0 / 2.2)); +#endif + + o_col0 = c; +} + + +#endif diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index 9a3b551dd0..1022d718b7 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -18,6 +18,10 @@ void main() #ifdef FRAGMENT_SHADER +#ifndef PS_HDR +#define PS_HDR 0 +#endif + layout(location = 0) in vec2 v_tex; #if defined(ps_convert_rgba8_16bits) || defined(ps_convert_float32_32bits) @@ -45,6 +49,23 @@ vec4 sample_c(vec2 uv) return texture(samp0, uv); } +float saturate(float c) +{ + return clamp(c, 0.0, 1.0); +} +vec2 saturate(vec2 c) +{ + return clamp(c, 0.0, 1.0); +} +vec3 saturate(vec3 c) +{ + return clamp(c, 0.0, 1.0); +} +vec4 saturate(vec4 c) +{ + return clamp(c, 0.0, 1.0); +} + #ifdef ps_copy void ps_copy() { @@ -93,7 +114,7 @@ void ps_filter_transparency() // Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life void ps_convert_rgba8_16bits() { - uvec4 i = uvec4(sample_c(v_tex) * vec4(255.5f, 255.5f, 255.5f, 255.5f)); + uvec4 i = uvec4(saturate(sample_c(v_tex)) * vec4(255.5f, 255.5f, 255.5f, 255.5f)); o_col0 = ((i.x & 0x00F8u) >> 3) | ((i.y & 0x00F8u) << 2) | ((i.z & 0x00f8u) << 7) | ((i.w & 0x80u) << 8); } @@ -136,6 +157,7 @@ void ps_datm0_rta_correction() void ps_rta_correction() { vec4 value = sample_c(v_tex); + value.rgb = saturate(value.rgb); o_col0 = vec4(value.rgb, value.a / (128.25f / 255.0f)); } #endif @@ -144,15 +166,35 @@ void ps_rta_correction() void ps_rta_decorrection() { vec4 value = sample_c(v_tex); + value.rgb = saturate(value.rgb); o_col0 = vec4(value.rgb, value.a * (128.25f / 255.0f)); } #endif +float fmod_mask_positive(float a, float b) +{ + // Don't wrap if the number if a multiple, to emulate bit mask operators + if (mod(a, b) == 0.f && a != 0.f) + { + return b; + } + return mod(mod(a, b) + b, b); +} +vec3 fmod_mask_positive(vec3 a, float b) +{ + return vec3(fmod_mask_positive(a.x, b), fmod_mask_positive(a.y, b), fmod_mask_positive(a.z, b)); +} + #ifdef ps_colclip_init void ps_colclip_init() { vec4 value = sample_c(v_tex); + value.rgb = saturate(value.rgb); // Clamp to [0,1] range given we might have upgraded the "Color" texture to float/HDR, to avoid overflow +#if PS_HDR + o_col0 = vec4(value.rgb * 255.f / 65535.f, value.a); +#else o_col0 = vec4(roundEven(value.rgb * 255.0f) / 65535.0f, value.a); +#endif } #endif @@ -160,7 +202,11 @@ void ps_colclip_init() void ps_colclip_resolve() { vec4 value = sample_c(v_tex); +#if PS_HDR + o_col0 = vec4(fmod_mask_positive(value.rgb * 65535.f, 255.f) / 255.f, value.a); +#else o_col0 = vec4(vec3(uvec3(value.rgb * 65535.5f) & 255u) / 255.0f, value.a); +#endif } #endif @@ -192,25 +238,25 @@ void ps_convert_float16_rgb5a1() float rgba8_to_depth32(vec4 unorm) { - uvec4 c = uvec4(unorm * vec4(255.5f)); + uvec4 c = uvec4(saturate(unorm) * vec4(255.5f)); return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f); } float rgba8_to_depth24(vec4 unorm) { - uvec3 c = uvec3(unorm.rgb * vec3(255.5f)); + uvec3 c = uvec3(saturate(unorm.rgb) * vec3(255.5f)); return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f); } float rgba8_to_depth16(vec4 unorm) { - uvec2 c = uvec2(unorm.rg * vec2(255.5f)); + uvec2 c = uvec2(saturate(unorm.rg) * vec2(255.5f)); return float(c.r | (c.g << 8)) * exp2(-32.0f); } float rgb5a1_to_depth16(vec4 unorm) { - uvec4 c = uvec4(unorm * vec4(255.5f)); + uvec4 c = uvec4(saturate(unorm) * vec4(255.5f)); return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f); } diff --git a/bin/resources/shaders/vulkan/imgui.glsl b/bin/resources/shaders/vulkan/imgui.glsl index f886acf64b..389ac21952 100644 --- a/bin/resources/shaders/vulkan/imgui.glsl +++ b/bin/resources/shaders/vulkan/imgui.glsl @@ -1,18 +1,20 @@ // SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ +layout(push_constant) uniform PushConstants +{ + vec2 uScale; + vec2 uTranslate; + vec2 uBrigthness; + vec2 uPad; +}; + #ifdef VERTEX_SHADER layout(location = 0) in vec2 Position; layout(location = 1) in vec2 UV; layout(location = 2) in vec4 Color; -layout(push_constant) uniform PushConstants -{ - vec2 uScale; - vec2 uTranslate; -}; - layout(location = 0) out vec2 Frag_UV; layout(location = 1) out vec4 Frag_Color; @@ -27,6 +29,10 @@ void vs_main() #ifdef FRAGMENT_SHADER +#ifndef PS_HDR +#define PS_HDR 0 +#endif + layout(binding = 0) uniform sampler2D Texture; layout(location = 0) in vec2 Frag_UV; @@ -37,6 +43,11 @@ layout(location = 0) out vec4 Out_Color; void ps_main() { Out_Color = Frag_Color * texture(Texture, Frag_UV.st); +#if PS_HDR + Out_Color.rgb = pow(Out_Color.rgb, vec3(2.2)); + //Out_Color.a = pow(Out_Color.a, 1.0 / 2.2); // Approximation to match gamma space blends +#endif + Out_Color.rgb *= uBrigthness.x; // Always 1 in SDR } #endif diff --git a/bin/resources/shaders/vulkan/present.glsl b/bin/resources/shaders/vulkan/present.glsl index 23de574947..1498df7e92 100644 --- a/bin/resources/shaders/vulkan/present.glsl +++ b/bin/resources/shaders/vulkan/present.glsl @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team // SPDX-License-Identifier: GPL-3.0+ +#ifndef PS_HDR +#define PS_HDR 0 +#endif + #ifdef VERTEX_SHADER layout(location = 0) in vec4 a_pos; @@ -28,7 +32,7 @@ layout(push_constant) uniform cb10 vec2 u_rcp_target_resolution; // 1 / u_target_resolution vec2 u_source_resolution; vec2 u_rcp_source_resolution; // 1 / u_source_resolution - float u_time; + vec2 u_time_and_brightness; // time, user brightness scale (HDR) }; layout(location = 0) in vec2 v_tex; @@ -62,10 +66,31 @@ vec4 ps_scanlines(uint i) return sample_c(v_tex) * clamp((mask[i] + 0.5f), 0.0f, 1.0f); } +vec4 EncodeOutput(vec4 color) +{ + // If necessary we could convert to any color space here, + // assuming we are starting Rec.709 with gamma 2.2. +#if !PS_HDR && 1 //TODO: Test only! + // Convert to sRGB encoding (useful to test SDR in HDR as Windows interprets SDR content as sRGB) + vec3 color_in_excess = color.rgb - clamp(color.rgb, 0.f, 1.f); + color.rgb = clamp(color.rgb, 0.f, 1.f); + color.rgb = pow(color.rgb, vec3(2.2)); + color.r = color.r < 0.0031308 ? (color.r * 12.92) : (1.055 * pow(color.r, 0.41666) - 0.055); + color.g = color.g < 0.0031308 ? (color.g * 12.92) : (1.055 * pow(color.g, 0.41666) - 0.055); + color.b = color.b < 0.0031308 ? (color.b * 12.92) : (1.055 * pow(color.b, 0.41666) - 0.055); + color.rgb += color_in_excess; +#endif + + // Apply the user brightness level + color.rgb *= u_time_and_brightness.y; + return color; +} + #ifdef ps_copy void ps_copy() { o_col0 = sample_c(v_tex); + o_col0 = EncodeOutput(o_col0); } #endif @@ -75,6 +100,7 @@ void ps_filter_scanlines() // scanlines uvec4 p = uvec4(gl_FragCoord); o_col0 = ps_scanlines(p.y % 2); + o_col0 = EncodeOutput(o_col0); } #endif @@ -83,6 +109,7 @@ void ps_filter_diagonal() // diagonal { uvec4 p = uvec4(gl_FragCoord); o_col0 = ps_crt((p.x + (p.y % 3)) % 3); + o_col0 = EncodeOutput(o_col0); } #endif @@ -93,6 +120,7 @@ void ps_filter_triangular() // triangular // output.c = ps_crt(input, ((p.x + (p.y & 1) * 3) >> 1) % 3); o_col0 = ps_crt(((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3); + o_col0 = EncodeOutput(o_col0); } #endif @@ -103,6 +131,7 @@ void ps_filter_complex() // triangular vec2 texdim = vec2(textureSize(samp0, 0)); o_col0 = (0.9 - 0.4 * cos(2 * PI * v_tex.y * texdim.y)) * sample_c(vec2(v_tex.x, (floor(v_tex.y * texdim.y) + 0.5) / texdim.y)); + o_col0 = EncodeOutput(o_col0); } #endif @@ -123,7 +152,10 @@ void ps_filter_complex() // triangular float ToLinear1(float c) { - return c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4); +#if PS_HDR // Already linear + return c; +#endif + return pow(abs(c), 2.2) * sign(c); } vec3 ToLinear(vec3 c) @@ -131,14 +163,17 @@ vec3 ToLinear(vec3 c) return vec3(ToLinear1(c.r), ToLinear1(c.g), ToLinear1(c.b)); } -float ToSrgb1(float c) +float ToGamma1(float c) { - return c < 0.0031308 ? c * 12.92 : 1.055 * pow(c, 0.41666) - 0.055; +#if PS_HDR // Already linear + return c; +#endif + return pow(abs(c), 1.0 / 2.2) * sign(c); } -vec3 ToSrgb(vec3 c) +vec3 ToGamma(vec3 c) { - return vec3(ToSrgb1(c.r), ToSrgb1(c.g), ToSrgb1(c.b)); + return vec3(ToGamma1(c.r), ToGamma1(c.g), ToGamma1(c.b)); } vec3 Fetch(vec2 pos, vec2 off) @@ -391,7 +426,7 @@ vec4 LottesCRTPass() #if UseShadowMask color.rgb *= Mask(fragcoord.xy); #endif - color.rgb = ToSrgb(color.rgb); + color.rgb = ToGamma(color.rgb); return color; } @@ -399,6 +434,7 @@ vec4 LottesCRTPass() void ps_filter_lottes() { o_col0 = LottesCRTPass(); + o_col0 = EncodeOutput(o_col0); } #endif @@ -418,6 +454,7 @@ void ps_4x_rgss() color += sample_c(v_tex + vec2(-l, s) * dxy).rgb; o_col0 = vec4(color * 0.25,1); + o_col0 = EncodeOutput(o_col0); } #endif @@ -440,6 +477,7 @@ void ps_automagical_supersampling() } o_col0 = vec4(col / div, 1); + o_col0 = EncodeOutput(o_col0); } #endif diff --git a/bin/resources/shaders/vulkan/shadeboost.glsl b/bin/resources/shaders/vulkan/shadeboost.glsl deleted file mode 100644 index b604fdcd8d..0000000000 --- a/bin/resources/shaders/vulkan/shadeboost.glsl +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team -// SPDX-License-Identifier: GPL-3.0+ - -//#version 420 // Keep it for editor detection - -#ifdef VERTEX_SHADER - -layout(location = 0) in vec4 a_pos; -layout(location = 1) in vec2 a_tex; - -layout(location = 0) out vec2 v_tex; - -void main() -{ - gl_Position = vec4(a_pos.x, -a_pos.y, a_pos.z, a_pos.w); - v_tex = a_tex; -} - -#endif - -/* -** Contrast, saturation, brightness -** Code of this function is from TGM's shader pack -** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057 -** TGM's author comment about the license (included in the previous link) -** "do with it, what you want! its total free! -** (but would be nice, if you say that you used my shaders :wink: ) but not necessary" -*/ - -#ifdef FRAGMENT_SHADER - -layout(push_constant) uniform cb0 -{ - vec4 params; -}; - -layout(set = 0, binding = 0) uniform sampler2D samp0; -layout(location = 0) in vec2 v_tex; -layout(location = 0) out vec4 o_col0; - -// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150% -vec4 ContrastSaturationBrightness(vec4 color) -{ - float brt = params.x; - float con = params.y; - float sat = params.z; - - // Increase or decrease these values to adjust r, g and b color channels separately - const float AvgLumR = 0.5; - const float AvgLumG = 0.5; - const float AvgLumB = 0.5; - - const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721); - - vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB); - vec3 brtColor = color.rgb * brt; - float dot_intensity = dot(brtColor, LumCoeff); - vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity); - vec3 satColor = mix(intensity, brtColor, sat); - vec3 conColor = mix(AvgLumin, satColor, con); - - color.rgb = conColor; - return color; -} - - -void main() -{ - vec4 c = texture(samp0, v_tex); - o_col0 = ContrastSaturationBrightness(c); -} - - -#endif diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index ba21b4c0b5..3b58cc21c8 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -294,6 +294,16 @@ void main() #define PS_ZCLAMP 0 #define PS_FEEDBACK_LOOP 0 #define PS_TEX_IS_FB 0 +#define PS_NO_COLOR 0 +#define PS_NO_COLOR1 0 +#define PS_DATE 0 +#define PS_HDR 0 +#endif + +//TODO: clear +#if 0 +#undef PS_HDR +#define PS_HDR 0 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) @@ -305,6 +315,12 @@ void main() #define NEEDS_TEX (PS_TFX != 4) +#if PS_HDR +#define RT_COLOR_OFFSET 0.0f +#else +#define RT_COLOR_OFFSET 0.1f +#endif + layout(std140, set = 0, binding = 1) uniform cb1 { vec3 FogColor; @@ -363,6 +379,28 @@ layout(set = 1, binding = 1) uniform texture2D Palette; layout(set = 1, binding = 3) uniform texture2D PrimMinTexture; #endif +float fmod_positive(float a, float b) +{ + return mod(mod(a, b) + b, b); +} +vec3 fmod_positive(vec3 a, float b) +{ + return vec3(fmod_positive(a.x, b), fmod_positive(a.y, b), fmod_positive(a.z, b)); +} +float fmod_mask_positive(float a, float b) +{ + // Don't wrap if the number if a multiple, to emulate bit mask operators + if (mod(a, b) == 0.f && a != 0.f) + { + return b; + } + return mod(mod(a, b) + b, b); +} +vec3 fmod_mask_positive(vec3 a, float b) +{ + return vec3(fmod_mask_positive(a.x, b), fmod_mask_positive(a.y, b), fmod_mask_positive(a.z, b)); +} + #if NEEDS_TEX vec4 sample_c(vec2 uv) @@ -409,14 +447,28 @@ vec4 sample_c(vec2 uv) #endif } +#if PS_HDR +vec4 sample_p(float idx) +#else vec4 sample_p(uint idx) +#endif { +#if PS_HDR + float sizeX = 256.f; + // X is always 256. Y is always 1. + float excess = max(idx - (sizeX - 1.f), 0.f) / sizeX; + return texelFetch(Palette, ivec2(int(idx), 0), 0) * (excess + 1.f); +#endif return texelFetch(Palette, ivec2(int(idx), 0), 0); } vec4 sample_p_norm(float u) { +#if PS_HDR + return sample_p(u * 255.0f); +#else return sample_p(uint(u * 255.5f)); +#endif } vec4 clamp_wrap_uv(vec4 uv) @@ -513,7 +565,12 @@ mat4 sample_4c(vec4 uv) return c; } -uvec4 sample_4_index(vec4 uv) +#if PS_HDR +vec4 +#else +uvec4 +#endif + sample_4_index(vec4 uv) { vec4 c; @@ -524,25 +581,45 @@ uvec4 sample_4_index(vec4 uv) // Denormalize value -#if PS_RTA_SRC_CORRECTION - uvec4 i = uvec4(round(c * 128.25f)); +#if !PS_HDR + #if PS_RTA_SRC_CORRECTION + uvec4 i = uvec4(round(c * 128.25f)); + #else + uvec4 i = uvec4(c * 255.5f); + #endif #else - uvec4 i = uvec4(c * 255.5f); + #if PS_RTA_SRC_CORRECTION + vec4 i = c * 127.5f; + #else + vec4 i = c * 255.f; + #endif #endif #if PS_PAL_FMT == 1 // 4HL - return i & 0xFu; + #if !PS_HDR + return i & 0xFu; + #else + return mod(i, 16.f); // Note: negative handling is a bit random here but it should be fine + #endif #elif PS_PAL_FMT == 2 // 4HH - return i >> 4u; + #if !PS_HDR + return i >> 4u; + #else + return max(i - pow(2.f, 4.f), min(i, 0.f)) / pow(2.f, 4.f); + #endif #else // 8 return i; #endif } +#if PS_HDR +mat4 sample_4p(vec4 u) +#else mat4 sample_4p(uvec4 u) +#endif { mat4 c; @@ -843,6 +920,10 @@ vec4 sample_color(vec2 st) #if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION t.a = t.a * (128.5f / 255.0f); #endif + + #if PS_HDR + return t * 255.0f; + #endif return trunc(t * 255.0f + 0.05f); } @@ -851,7 +932,10 @@ vec4 sample_color(vec2 st) vec4 tfx(vec4 T, vec4 C) { vec4 C_out; - vec4 FxT = trunc((C * T) / 128.0f); + vec4 FxT = (C * T) / 128.0f; + #if !PS_HDR + FxT = trunc(FxT); + #endif #if (PS_TFX == 0) C_out = FxT; @@ -873,7 +957,9 @@ vec4 tfx(vec4 T, vec4 C) #if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3) // Clamp only when it is useful - C_out = min(C_out, 255.0f); + #if !PS_HDR + C_out = min(C_out, 255.0f); + #endif #endif return C_out; @@ -910,7 +996,10 @@ bool atst(vec4 C) vec4 fog(vec4 c, float f) { #if PS_FOG - c.rgb = trunc(mix(FogColor, c.rgb, f)); + c.rgb = mix(FogColor, c.rgb, f); + #if !PS_HDR + c.rgb = trunc(c.rgb); + #endif #endif return c; @@ -973,13 +1062,26 @@ vec4 ps_color() void ps_fbmask(inout vec4 C) { #if PS_FBMASK - - #if PS_COLCLIP_HW == 1 - vec4 RT = trunc(sample_from_rt() * 65535.0f); - #else - vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); - #endif - C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + if (PS_HDR && !PS_COLCLIP_HW) + { + vec4 RT = sample_from_rt() * 255.0f; + bvec4 hi_bit = (FbMask & 0x80) != 0; + RT = hi_bit ? RT : min(RT, 255.0f); + C = hi_bit ? min(C, 255.0f) : C; + uvec4 RTi = (uvec4)(RT + 0.5f); + uvec4 Ci = (uvec4)(C + 0.5f); + uvec4 mask = ((ivec4)FbMask << 24) >> 24; // Sign extend mask + C = (vec4)((Ci & ~mask) | (RTi & mask)); + } + else + { + #if PS_COLCLIP_HW == 1 + vec4 RT = trunc(sample_from_rt() * 65535.0f); + #else + vec4 RT = trunc(sample_from_rt() * 255.0f + RT_COLOR_OFFSET); + #endif + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + } #endif } @@ -1018,18 +1120,24 @@ void ps_dither(inout vec3 C, float As) void ps_color_clamp_wrap(inout vec3 C) { + int mask = 0; + // When dithering the bottom 3 bits become meaningless and cause lines in the picture // so we need to limit the color depth on dithered items #if SW_BLEND || (PS_DITHER > 0 && PS_DITHER < 3) || PS_FBMASK #if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV - C += 7.0f; // Need to round up, not down since the shader will invert + C += (float)(0xFF - 0xF8); // Need to round up, not down since the shader will invert #endif // Correct the Color value based on the output format #if PS_COLCLIP == 0 && PS_COLCLIP_HW == 0 // Standard Clamp - C = clamp(C, vec3(0.0f), vec3(255.0f)); + #if PS_HDR == 0 + C = clamp(C, vec3(0.0f), vec3(255.0f)); + #else // Without this, bloom in some games can go negative and make the scene darker + C = max(C, vec3(0.0f)); + #endif #endif // FIXME rouding of negative float? @@ -1040,14 +1148,23 @@ void ps_color_clamp_wrap(inout vec3 C) // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 #if PS_DST_FMT == FMT_16 && PS_DITHER != 3 && (PS_BLEND_MIX == 0 || PS_DITHER > 0) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania - C = vec3(ivec3(C) & ivec3(0xF8)); + mask = 0xF8; #elif PS_COLCLIP == 1 || PS_COLCLIP_HW == 1 - C = vec3(ivec3(C) & ivec3(0xFF)); + mask = 0xFF; #endif #elif PS_DST_FMT == FMT_16 && PS_DITHER != 3 && PS_BLEND_MIX == 0 && PS_BLEND_HW == 0 - C = vec3(ivec3(C) & ivec3(0xF8)); + mask = 0xF8; #endif + + if (mask != 0) + { +#if PS_HDR // Avoid quantization to 8bit in HDR + C = mask == 0xFF ? fmod_mask_positive(C, 255.f) : (C - fmod_positive(C, 8)); // 248 → 255 - 7 = 248 +#else + C = vec3(ivec3(C) & ivec3(mask)); +#endif + } } void ps_blend(inout vec4 Color, inout vec4 As_rgba) @@ -1077,9 +1194,9 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) #endif #if PS_RTA_CORRECTION - float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f; + float Ad = trunc(RT.a * 128.0f + RT_COLOR_OFFSET) / 128.0f; #else - float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; + float Ad = trunc(RT.a * 255.0f + RT_COLOR_OFFSET) / 128.0f; #endif #if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED @@ -1099,9 +1216,12 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) // Let the compiler do its jobs ! #if PS_COLCLIP_HW == 1 - vec3 Cd = trunc(RT.rgb * 65535.0f); + vec3 Cd = RT.rgb * 65535.0f; #else - vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f); + vec3 Cd = RT.rgb * 255.0f + RT_COLOR_OFFSET; + #endif + #if !PS_HDR + Cd = trunc(Cd); #endif vec3 Cs = Color.rgb; @@ -1146,19 +1266,29 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) #if PS_BLEND_A == PS_BLEND_B Color.rgb = D; - // In blend_mix, HW adds on some alpha factor * dst. - // Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation. - // Instead, apply an offset to convert HW's round to a floor. - // Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision. - // But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399 - // Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause. - // 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256). #elif PS_BLEND_MIX == 2 - Color.rgb = ((A - B) * C_clamped + D) + (124.0f/256.0f); + Color.rgb = (A - B) * C_clamped + D; #elif PS_BLEND_MIX == 1 - Color.rgb = ((A - B) * C_clamped + D) - (124.0f/256.0f); + Color.rgb = (A - B) * C_clamped + D; #else - Color.rgb = trunc((A - B) * C + D); + Color.rgb = (A - B) * C + D; + #endif + + #if PS_BLEND_A != PS_BLEND_B && PS_HDR == 0 + // In blend_mix, HW adds on some alpha factor * dst. + // Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation. + // Instead, apply an offset to convert HW's round to a floor. + // Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision. + // But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399 + // Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause. + // 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256). + #if PS_BLEND_MIX == 2 + Color.rgb += 124.0f / 256.0f; + #elif PS_BLEND_MIX == 1 + Color.rgb -= 124.0f / 256.0f; + #else + Color.rgb = trunc(Color.rgb); + #endif #endif #if PS_BLEND_HW == 1 @@ -1297,9 +1427,12 @@ void main() #if SW_AD_TO_HW #if PS_RTA_CORRECTION - vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f); + vec4 RT = sample_from_rt() * 128.0f + RT_COLOR_OFFSET; #else - vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); + vec4 RT = sample_from_rt() * 255.0f + RT_COLOR_OFFSET; + #endif + #if !PS_HDR + RT = trunc(RT); #endif vec4 alpha_blend = vec4(RT.a / 128.0f); @@ -1391,12 +1524,14 @@ void main() o_col0.a = C.a / 255.0f; #endif #if PS_COLCLIP_HW == 1 - o_col0.rgb = vec3(C.rgb / 65535.0f); + o_col0.rgb = C.rgb / 65535.0f; #else o_col0.rgb = C.rgb / 255.0f; #endif + o_col0.a = clamp(o_col0.a, 0.f, 2.f); //TODO: ... #if !PS_NO_COLOR1 o_col1 = alpha_blend; + o_col1.a = clamp(o_col1.a, 0.f, 2.f); #endif #if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend if (!atst_pass) diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index d291aab616..8a6f9ad67f 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -137,12 +137,19 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.shadeBoostBrightness, "EmuCore/GS", "ShadeBoost_Brightness", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.shadeBoostContrast, "EmuCore/GS", "ShadeBoost_Contrast", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.shadeBoostSaturation, "EmuCore/GS", "ShadeBoost_Saturation", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.colorCorrect, "EmuCore/GS", "ColorCorrect", false); + SettingWidgetBinder::BindWidgetToFloatSetting(sif, m_ui.colorCorrectGameGamma, "EmuCore/GS", "ColorCorrect_GameGamma", Pcsx2Config::GSOptions::DEFAULT_GAME_GAMMA); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.colorCorrectGameColorSpace, "EmuCore/GS", "ColorCorrect_GameColorSpace", (int)GSColorSpaceCorrection::Rec_709); + SettingWidgetBinder::BindWidgetToFloatSetting(sif, m_ui.hdrBrightness, "EmuCore/GS", "HDR_BrightnessNits", Pcsx2Config::GSOptions::DEFAULT_HDR_BRIGHTNESS_NITS); + SettingWidgetBinder::BindWidgetToFloatSetting(sif, m_ui.hdrPeakBrightness, "EmuCore/GS", "HDR_PeakBrightnessNits", Pcsx2Config::GSOptions::DEFAULT_HDR_PEAK_BRIGHTNESS_NITS); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.tvShader, "EmuCore/GS", "TVShader", DEFAULT_TV_SHADER_MODE); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.casMode, "EmuCore/GS", "CASMode", static_cast(GSCASMode::Disabled)); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.casSharpness, "EmuCore/GS", "CASSharpness", DEFAULT_CAS_SHARPNESS); connect(m_ui.shadeBoost, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onShadeBoostChanged); onShadeBoostChanged(); + connect(m_ui.colorCorrect, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onColorCorrectChanged); + onColorCorrectChanged(); connect(m_ui.osdMessagesPos, &QComboBox::currentIndexChanged, this, &GraphicsSettingsWidget::onMessagesPosChanged); connect(m_ui.osdPerformancePos, &QComboBox::currentIndexChanged, this, &GraphicsSettingsWidget::onPerformancePosChanged); onMessagesPosChanged(); @@ -158,6 +165,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* s_anisotropic_filtering_entries, s_anisotropic_filtering_values, "0"); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.dithering, "EmuCore/GS", "dithering_ps2", 2); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.mipmapping, "EmuCore/GS", "hw_mipmap", true); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hdr, "EmuCore/GS", "hdr", false); //TODO: expose to SW renderer too? Or split the HDR textures vs HDR output SettingWidgetBinder::BindWidgetToIntSetting( sif, m_ui.blending, "EmuCore/GS", "accurate_blending_unit", static_cast(AccBlendLevel::Basic)); SettingWidgetBinder::BindWidgetToIntSetting( @@ -167,6 +175,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.trilinearFiltering, QOverload::of(&QComboBox::currentIndexChanged), this, &GraphicsSettingsWidget::onTrilinearFilteringChanged); onTrilinearFilteringChanged(); + connect(m_ui.hdr, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onHDRChanged); + onHDRChanged(); ////////////////////////////////////////////////////////////////////////// // HW Renderer Fixes @@ -548,6 +558,9 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp( m_ui.mipmapping, tr("Mipmapping"), tr("Checked"), tr("Enables mipmapping, which some games require to render correctly. Mipmapping uses progressively lower resolution variants of textures at progressively further distances to reduce processing load and avoid visual artifacts.")); + dialog->registerWidgetHelp( + m_ui.hdr, tr("HDR"), tr("Checked"), tr("Forces all rendering to be in HDR without integer rounding, and HDR output. It will likely break many games. It might not work on all rendering backends.")); + dialog->registerWidgetHelp( m_ui.textureFiltering, tr("Texture Filtering"), tr("Bilinear (PS2)"), tr("Changes what filtering algorithm is used to map textures to surfaces.
" @@ -741,6 +754,14 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp(m_ui.shadeBoostSaturation, tr("Saturation"), tr("50"), tr("Adjusts saturation. 50 is normal.")); + dialog->registerWidgetHelp(m_ui.colorCorrectGameGamma, tr("Game Gamma"), tr(/*DEFAULT_GAME_GAMMA*/ "2.35"), tr("This will interpret the game as having this specific gamma, and convert it to your display gamma (meant to be 2.2).\n2.35 is the average CRT TV gamma.")); + + dialog->registerWidgetHelp(m_ui.colorCorrectGameColorSpace, tr("Game Color Space"), tr("Rec.709/sRGB"), tr("This will interpret the game as being developed on (or for) a specific color space (each region had its own), and convert it to your display color space (Rec.709/sRGB).\nIt's not know what standard each game targeted, if any.")); + + dialog->registerWidgetHelp(m_ui.hdrBrightness, tr("HDR Brightness"), tr(/*DEFAULT_HDR_BRIGHTNESS_NITS*/ "203"), tr("Adjusts the brightness of the HDR output (in nits). 203 nits is standard.")); + + dialog->registerWidgetHelp(m_ui.hdrPeakBrightness, tr("HDR Peak Brightness"), tr(/*DEFAULT_HDR_PEAK_BRIGHTNESS_NITS*/ "203"), tr("Adjusts the peak brightness of the HDR output (in nits). It should match your display peak brightness.")); + dialog->registerWidgetHelp(m_ui.tvShader, tr("TV Shader"), tr("None (Default)"), tr("Applies a shader which replicates the visual effects of different styles of television set.")); } @@ -953,6 +974,20 @@ void GraphicsSettingsWidget::onShadeBoostChanged() m_ui.shadeBoostSaturation->setEnabled(enabled); } +void GraphicsSettingsWidget::onColorCorrectChanged() +{ + const bool enabled = m_dialog->getEffectiveBoolValue("EmuCore/GS", "ColorCorrect", false); + m_ui.colorCorrectGameGamma->setEnabled(enabled); + m_ui.colorCorrectGameColorSpace->setEnabled(enabled); +} + +void GraphicsSettingsWidget::onHDRChanged() +{ + const bool enabled = m_dialog->getEffectiveBoolValue("EmuCore/GS", "hdr", false); + m_ui.hdrBrightness->setEnabled(enabled); + m_ui.hdrPeakBrightness->setEnabled(enabled); +} + void GraphicsSettingsWidget::onMessagesPosChanged() { const bool enabled = m_ui.osdMessagesPos->currentIndex() != (m_dialog->isPerGameSettings() ? 1 : 0); @@ -1143,6 +1178,11 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.tabs->setTabEnabled(5, is_hardware); m_ui.tabs->setTabVisible(5, is_hardware); + // HDR (SW rendering supports it on post processing only, but for now they don't have separate GUI settings) + m_ui.hdr->setEnabled(!is_software && type != GSRendererType::OGL && type != GSRendererType::Metal); + m_ui.hdrBrightness->setEnabled(m_ui.hdr->isEnabled() && m_ui.hdr->isChecked()); + m_ui.hdrPeakBrightness->setEnabled(m_ui.hdr->isEnabled() && m_ui.hdr->isChecked()); + // move back to the renderer if we're on one of the now-hidden tabs if (is_software && (prev_tab == 1 || (prev_tab >= 2 && prev_tab <= 5))) m_ui.tabs->setCurrentIndex(2); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.h b/pcsx2-qt/Settings/GraphicsSettingsWidget.h index e559bcf0fe..5310becae1 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.h +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.h @@ -37,6 +37,8 @@ private Q_SLOTS: void onTextureDumpChanged(); void onTextureReplacementChanged(); void onShadeBoostChanged(); + void onColorCorrectChanged(); + void onHDRChanged(); void onMessagesPosChanged(); void onPerformancePosChanged(); void onCaptureContainerChanged(); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index 5ffc26604d..299acf97b6 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -635,6 +635,13 @@ + + + + HDR + + + @@ -1402,6 +1409,154 @@ + + + + Color Correction + + + + + + Correct Colors + + + + + + + + + Qt::Orientation::Horizontal + + + + 40 + 20 + + + + + + + + Game Gamma: + + + + + + + 2.0 + + + 3.0 + + + 0.01 + + + + + + + Game Color Space: + + + + + + + + Rec.709/sRGB + + + + + NTCS-M (Rec.601) + + + + + NTCS-J + + + + + PAL + + + + + + + + + + HDR + + + + + + + + + Qt::Orientation::Horizontal + + + + 40 + 20 + + + + + + + + HDR Brightness: + + + + + + + nits + + + 80 + + + 500 + + + + + + + HDR Peak Brightness: + + + + + + + nits + + + 400 + + + 10000 + + + + + + + + @@ -1525,7 +1680,7 @@ - 1 + 0 100 diff --git a/pcsx2/Config.h b/pcsx2/Config.h index f6bd3ce2d3..1c7166c4db 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -33,6 +33,8 @@ class SettingsWrapper; enum class CDVD_SourceType : uint8_t; +#define OLD_HDR 0 + namespace Pad { enum class ControllerType : u8; @@ -452,6 +454,14 @@ enum class GSNativeScaling : u8 MaxCount }; +enum class GSColorSpaceCorrection : u8 +{ + Rec_709, // No correction (Rec.709/sRGB/scRGB) + NTSC_M, + NTSC_J, + PAL +}; + // -------------------------------------------------------------------------------------- // TraceLogsEE // -------------------------------------------------------------------------------------- @@ -683,6 +693,12 @@ struct Pcsx2Config static constexpr int DEFAULT_AUDIO_CAPTURE_BITRATE = 192; static const char* DEFAULT_CAPTURE_CONTAINER; + static constexpr float DEFAULT_GAME_GAMMA = 2.35f; // CRT average gamma + + static constexpr float DEFAULT_SRGB_BRIGHTNESS_NITS = 80.f; + static constexpr float DEFAULT_HDR_BRIGHTNESS_NITS = 203.f; // ITU standard + static constexpr float DEFAULT_HDR_PEAK_BRIGHTNESS_NITS = 1000.f; // Common value as of 2025 + union { u64 bitset; @@ -727,6 +743,8 @@ struct Pcsx2Config PreloadFrameWithGSData : 1, Mipmap : 1, HWMipmap : 1, + HDRRendering : 1, + HDROutput : 1, ManualUserHacks : 1, UserHacks_AlignSpriteX : 1, UserHacks_CPUFBConversion : 1, @@ -740,6 +758,7 @@ struct Pcsx2Config UserHacks_NativePaletteDraw : 1, UserHacks_EstimateTextureRegion : 1, FXAA : 1, + ColorCorrect : 1, ShadeBoost : 1, DumpGSData : 1, SaveRT : 1, @@ -818,6 +837,10 @@ struct Pcsx2Config u8 ShadeBoost_Brightness = 50; u8 ShadeBoost_Contrast = 50; u8 ShadeBoost_Saturation = 50; + float ColorCorrect_GameGamma = DEFAULT_GAME_GAMMA; + GSColorSpaceCorrection ColorCorrect_GameColorSpace = GSColorSpaceCorrection::Rec_709; + float HDR_BrightnessNits = DEFAULT_HDR_BRIGHTNESS_NITS; + float HDR_PeakBrightnessNits = DEFAULT_HDR_PEAK_BRIGHTNESS_NITS; u8 PNGCompressionLevel = 1; u16 SWExtraThreads = 2; @@ -1326,6 +1349,8 @@ struct Pcsx2Config AspectRatioType CurrentAspectRatio = AspectRatioType::RAuto4_3_3_2; // Fall back aspect ratio for games that have patches (when AspectRatioType::RAuto4_3_3_2) is active. float CurrentCustomAspectRatio = 0.f; + bool HDRRendering = false; + bool HDROutput = false; bool IsPortableMode = false; Pcsx2Config(); diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index 43f88e826d..f7b06e342d 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -108,6 +108,24 @@ static bool OpenGSDevice(GSRendererType renderer, bool clear_state_on_fail, bool GSVSyncMode vsync_mode, bool allow_present_throttle) { const RenderAPI new_api = GetAPIForRenderer(renderer); + + // These features are only supported by some renderers and on some HW (e.g. HDR displays), + // so they need a live state. + EmuConfig.HDRRendering = GSConfig.HDRRendering; + EmuConfig.HDROutput = GSConfig.HDROutput; + + // Force disable HDR on unsupported (or partially supported) renderers. + if (new_api == RenderAPI::OpenGL || new_api == RenderAPI::Metal) + { + EmuConfig.HDRRendering = false; + EmuConfig.HDROutput = false; + } + // This is ignored by the SW renderer but let's turn it off for clarity. + if (!GSIsHardwareRenderer()) + { + EmuConfig.HDRRendering = false; + } + switch (new_api) { #ifdef _WIN32 diff --git a/pcsx2/GS/GSCapture.cpp b/pcsx2/GS/GSCapture.cpp index d55faf43f4..86c5ab0b2b 100644 --- a/pcsx2/GS/GSCapture.cpp +++ b/pcsx2/GS/GSCapture.cpp @@ -870,9 +870,10 @@ bool GSCapture::DeliverVideoFrame(GSTexture* stex) s_frame_encoded_cv.wait(lock, [&pf]() { return pf.state == PendingFrame::State::Unused; }); } - if (!pf.tex || pf.tex->GetWidth() != static_cast(stex->GetWidth()) || pf.tex->GetHeight() != static_cast(stex->GetHeight())) + if (!pf.tex || pf.tex->GetWidth() != static_cast(stex->GetWidth()) || pf.tex->GetHeight() != static_cast(stex->GetHeight()) || pf.tex->GetFormat() != stex->GetFormat()) { pf.tex.reset(); + pxAssert(stex->GetFormat() == GSCapture::CAPTURE_TEX_FORMAT); // For now only this format is supported pf.tex = g_gs_device->CreateDownloadTexture(stex->GetWidth(), stex->GetHeight(), stex->GetFormat()); if (!pf.tex) { diff --git a/pcsx2/GS/GSCapture.h b/pcsx2/GS/GSCapture.h index 237ed9caf3..eebc2ed76d 100644 --- a/pcsx2/GS/GSCapture.h +++ b/pcsx2/GS/GSCapture.h @@ -8,17 +8,18 @@ #include "common/SmallString.h" #include "GSVector.h" +#include "GS/Renderers/Common/GSTexture.h" namespace Threading { class ThreadHandle; } -class GSTexture; -class GSDownloadTexture; - namespace GSCapture { + // Any conversion from game buffer to a capture buffer requires 8bit per channel for simplicity (no HDR nor 10bit support) + static const GSTexture::Format CAPTURE_TEX_FORMAT = GSTexture::Format::Color; + bool BeginCapture(float fps, GSVector2i recommendedResolution, float aspect, std::string filename); bool DeliverVideoFrame(GSTexture* stex); void DeliverAudioPacket(const s16* frames); // AudioStream::CHUNK_SIZE diff --git a/pcsx2/GS/GSClut.cpp b/pcsx2/GS/GSClut.cpp index d518765140..14858e219c 100644 --- a/pcsx2/GS/GSClut.cpp +++ b/pcsx2/GS/GSClut.cpp @@ -443,7 +443,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) if (!dst) { // allocate texture lazily - dst = g_gs_device->CreateRenderTarget(dst_size, 1, GSTexture::Format::Color, false); + dst = g_gs_device->CreateRenderTarget(dst_size, 1, g_gs_device->GetEmuHWRTTexFormat(), false); //TODO: HDR or GSTexture::Format::Color??? Nah. Though all textures need to be HDR if HDR is enable, due to VK and DX12 pipelines being fixed format is_4bit ? (m_gpu_clut4 = dst) : (m_gpu_clut8 = dst); } if (dst) diff --git a/pcsx2/GS/GSClut.h b/pcsx2/GS/GSClut.h index 0d27227fc3..0c8b07071d 100644 --- a/pcsx2/GS/GSClut.h +++ b/pcsx2/GS/GSClut.h @@ -48,7 +48,7 @@ class alignas(32) GSClut final : public GSAlignedClass<32> GSTexture* m_gpu_clut4 = nullptr; GSTexture* m_gpu_clut8 = nullptr; - GSTexture* m_current_gpu_clut = nullptr; + GSTexture* m_current_gpu_clut = nullptr; // The lut to be used by HW renderers typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index f717e9ca10..4bbef60971 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -41,6 +41,8 @@ const char* shaderName(ShaderConvert value) { // clang-format off case ShaderConvert::COPY: return "ps_copy"; + case ShaderConvert::COPY_EMU_LQ: return "ps_copy"; + case ShaderConvert::COPY_POSTPROCESS: return "ps_copy"; case ShaderConvert::RGBA8_TO_16_BITS: return "ps_convert_rgba8_16bits"; case ShaderConvert::DATM_1: return "ps_datm1"; case ShaderConvert::DATM_0: return "ps_datm0"; @@ -211,6 +213,15 @@ std::unique_ptr g_gs_device; GSDevice::GSDevice() { + // Ideally the post process and emulation RTs formats would be split and we could have HDR on each of the two independently, + // though ultimately there's no much point in splitting them +#if OLD_HDR + m_emulation_hw_rt_texture_format = GSTexture::Format::Color; +#else + m_emulation_hw_rt_texture_format = EmuConfig.HDRRendering ? GSTexture::Format::ColorHDR : GSTexture::Format::Color; +#endif + m_postprocess_texture_format = EmuConfig.HDROutput ? GSTexture::Format::ColorHDR : GSTexture::Format::ColorHQ; + #ifdef PCSX2_DEVBUILD s_texture_counts.fill(0); #endif @@ -674,7 +685,7 @@ void GSDevice::ClearCurrent() void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c) { - if (ResizeRenderTarget(&m_merge, fs.x, fs.y, false, false)) + if (ResizeRenderTarget(&m_merge, fs.x, fs.y, false, false, m_postprocess_texture_format)) DoMerge(sTex, sRect, m_merge, dRect, PMODE, EXTBUF, c, GSConfig.PCRTCOffsets); m_current = m_merge; @@ -714,20 +725,20 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse switch (mode) { case 0: // Weave - ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false); + ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false, m_postprocess_texture_format); do_interlace(m_merge, m_weavebob, ShaderInterlace::WEAVE, false, offset, field); m_current = m_weavebob; break; case 1: // Bob // Field is reversed here as we are countering the bounce. - ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false); + ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false, m_postprocess_texture_format); do_interlace(m_merge, m_weavebob, ShaderInterlace::BOB, true, yoffset * (1 - field), 0); m_current = m_weavebob; break; case 2: // Blend - ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false); + ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false, m_postprocess_texture_format); do_interlace(m_merge, m_weavebob, ShaderInterlace::WEAVE, false, offset, field); - ResizeRenderTarget(&m_blend, ds.x, ds.y, true, false); + ResizeRenderTarget(&m_blend, ds.x, ds.y, true, false, m_postprocess_texture_format); do_interlace(m_weavebob, m_blend, ShaderInterlace::BLEND, false, 0, 0); m_current = m_blend; break; @@ -736,9 +747,9 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse bufIdx &= ~1; bufIdx |= field; bufIdx &= 3; - ResizeRenderTarget(&m_mad, ds.x, ds.y * 2.0f, true, false); + ResizeRenderTarget(&m_mad, ds.x, ds.y * 2.0f, true, false, m_postprocess_texture_format); do_interlace(m_merge, m_mad, ShaderInterlace::MAD_BUFFER, false, offset, bufIdx); - ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false); + ResizeRenderTarget(&m_weavebob, ds.x, ds.y, true, false, m_postprocess_texture_format); do_interlace(m_mad, m_weavebob, ShaderInterlace::MAD_RECONSTRUCT, false, 0, bufIdx); m_current = m_weavebob; break; @@ -750,27 +761,31 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse void GSDevice::FXAA() { - // Combining FXAA+ShadeBoost can't share the same target. + // Combining FXAA+ColorCorrect can't share the same target. GSTexture*& dTex = (m_current == m_target_tmp) ? m_merge : m_target_tmp; - if (ResizeRenderTarget(&dTex, m_current->GetWidth(), m_current->GetHeight(), false, false)) + if (ResizeRenderTarget(&dTex, m_current->GetWidth(), m_current->GetHeight(), false, false, m_postprocess_texture_format)) { DoFXAA(m_current, dTex); m_current = dTex; } } -void GSDevice::ShadeBoost() +void GSDevice::ColorCorrect() { - if (ResizeRenderTarget(&m_target_tmp, m_current->GetWidth(), m_current->GetHeight(), false, false)) + if (ResizeRenderTarget(&m_target_tmp, m_current->GetWidth(), m_current->GetHeight(), false, false, m_postprocess_texture_format)) { + ColorCorrectConstantBuffer cb = {}; + cb.correction.x = GSConfig.ColorCorrect ? GSConfig.ColorCorrect_GameGamma : 2.2f; + cb.correction.y = GSConfig.ColorCorrect ? static_cast(GSConfig.ColorCorrect_GameColorSpace) : 0.f; + cb.correction.z = EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f; + // preapply gamma on the peak brightness parameter + cb.correction.w = EmuConfig.HDROutput ? powf(GSConfig.HDR_PeakBrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS, 1.f / cb.correction.x) : 1.f; // predivide to avoid the divide (multiply) in the shader - const float params[4] = { - static_cast(GSConfig.ShadeBoost_Brightness) * (1.0f / 50.0f), - static_cast(GSConfig.ShadeBoost_Contrast) * (1.0f / 50.0f), - static_cast(GSConfig.ShadeBoost_Saturation) * (1.0f / 50.0f), - }; + cb.adjustment.x = (GSConfig.ShadeBoost ? static_cast(GSConfig.ShadeBoost_Brightness) : 50.f) * (1.0f / 50.0f); + cb.adjustment.y = (GSConfig.ShadeBoost ? static_cast(GSConfig.ShadeBoost_Contrast) : 50.f) * (1.0f / 50.0f); + cb.adjustment.z = (GSConfig.ShadeBoost ? static_cast(GSConfig.ShadeBoost_Saturation) : 50.f) * (1.0f / 50.0f); - DoShadeBoost(m_current, m_target_tmp, params); + DoColorCorrect(m_current, m_target_tmp, cb); m_current = m_target_tmp; } @@ -787,7 +802,7 @@ void GSDevice::Resize(int width, int height) s = m_current->GetSize() * GSVector2i(++multiplier); } - if (ResizeRenderTarget(&dTex, s.x, s.y, false, false)) + if (ResizeRenderTarget(&dTex, s.x, s.y, false, false, m_postprocess_texture_format)) { const GSVector4 sRect(0, 0, 1, 1); const GSVector4 dRect(0, 0, s.x, s.y); @@ -796,7 +811,7 @@ void GSDevice::Resize(int width, int height) } } -bool GSDevice::ResizeRenderTarget(GSTexture** t, int w, int h, bool preserve_contents, bool recycle) +bool GSDevice::ResizeRenderTarget(GSTexture** t, int w, int h, bool preserve_contents, bool recycle, GSTexture::Format default_format) { pxAssert(t); @@ -809,7 +824,7 @@ bool GSDevice::ResizeRenderTarget(GSTexture** t, int w, int h, bool preserve_con return true; } - const GSTexture::Format fmt = orig_tex ? orig_tex->GetFormat() : GSTexture::Format::Color; + const GSTexture::Format fmt = orig_tex ? orig_tex->GetFormat() : default_format; const bool really_preserve_contents = (preserve_contents && orig_tex); GSTexture* new_tex = FetchSurface(GSTexture::Type::RenderTarget, w, h, 1, fmt, !really_preserve_contents, true); if (!new_tex) @@ -881,7 +896,7 @@ void GSDevice::CAS(GSTexture*& tex, GSVector4i& src_rect, GSVector4& src_uv, con if (!m_cas || m_cas->GetWidth() != dst_width || m_cas->GetHeight() != dst_height) { delete m_cas; - m_cas = CreateSurface(GSTexture::Type::RWTexture, dst_width, dst_height, 1, GSTexture::Format::Color); + m_cas = CreateSurface(GSTexture::Type::RWTexture, dst_width, dst_height, 1, m_postprocess_texture_format); if (!m_cas) { Console.Error("Failed to allocate CAS RW texture."); diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index b61dca49be..d496a34b6e 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -16,7 +16,9 @@ enum class ShaderConvert { - COPY = 0, + COPY = 0, // Passthrough (generic version, it automatically redirects to the one specific for your render target format if necessary, targets the default HW emulation RTs format, "m_emulation_hw_rt_texture_format") + COPY_EMU_LQ, // Some emulation render targets might stay RGBA8 (GSTexture::Format::Color) even when HDR is enabled + COPY_POSTPROCESS, // Post processing possibly targets RGBA10 (GSTexture::Format::ColorHQ), see "m_postprocess_rt_texture_format" RGBA8_TO_16_BITS, DATM_1, DATM_0, @@ -29,7 +31,7 @@ enum class ShaderConvert TRANSPARENCY_FILTER, FLOAT32_TO_16_BITS, FLOAT32_TO_32_BITS, - FLOAT32_TO_RGBA8, + FLOAT32_TO_RGBA8, // Doesn't necessarily write to RGBA8 render targets (see "m_emulation_hw_rt_texture_format") FLOAT32_TO_RGB8, FLOAT16_TO_RGB5A1, RGBA8_TO_FLOAT32, @@ -143,7 +145,7 @@ static inline u32 ShaderConvertWriteMask(ShaderConvert shader) enum class PresentShader { - COPY = 0, + COPY = 0, // Passthrough SCANLINE, DIAGONAL_FILTER, TRIANGULAR_FILTER, @@ -171,6 +173,7 @@ enum ChannelFetch ChannelFetch_GXBY = 6, }; +//TODO enum class HWBlendType { SRC_ONE_DST_FACTOR = 1, // Use the dest color as blend factor, Cs is set to 1. @@ -195,7 +198,7 @@ struct alignas(16) DisplayConstantBuffer GSVector2 RcpTargetResolution; // +56,zw GSVector2 SourceResolution; // +64,xy GSVector2 RcpSourceResolution; // +72,zw - GSVector4 TimeAndPad; // seconds since GS init +76,xyzw + GSVector4 TimeAndBrightnessAndPad; // seconds since GS init and output brightness multiplier (HDR) +76,xyzw // +96 // assumes that sRect is normalized @@ -215,7 +218,11 @@ struct alignas(16) DisplayConstantBuffer } void SetTime(float time) { - TimeAndPad = GSVector4(time); + TimeAndBrightnessAndPad.x = time; + } + void SetBrightness(float brightness) + { + TimeAndBrightnessAndPad.y = brightness; } }; static_assert(sizeof(DisplayConstantBuffer) == 96, "DisplayConstantBuffer is correct size"); @@ -236,6 +243,13 @@ struct alignas(16) InterlaceConstantBuffer }; static_assert(sizeof(InterlaceConstantBuffer) == 16, "InterlaceConstantBuffer is correct size"); +struct alignas(16) ColorCorrectConstantBuffer +{ + GSVector4 correction; + GSVector4 adjustment; +}; +static_assert(sizeof(ColorCorrectConstantBuffer) == 32, "ColorCorrectConstantBuffer is correct size"); + enum HWBlendFlags { // Flags to determine blending behavior @@ -864,17 +878,21 @@ protected: bool m_allow_present_throttle = false; u64 m_last_frame_displayed_time = 0; + GSTexture::Format m_emulation_hw_rt_texture_format; // The generic color RT format HW renderers + GSTexture::Format m_postprocess_texture_format; + GSTexture* m_imgui_font = nullptr; GSTexture* m_merge = nullptr; GSTexture* m_weavebob = nullptr; GSTexture* m_blend = nullptr; GSTexture* m_mad = nullptr; - GSTexture* m_target_tmp = nullptr; - GSTexture* m_current = nullptr; GSTexture* m_cas = nullptr; GSTexture* m_colclip_rt = nullptr; ///< Temp hw colclip texture + GSTexture* m_target_tmp = nullptr; // A temporary RT for multiple purposes + GSTexture* m_current = nullptr; // The current RT (changes constantly) + bool AcquireWindow(bool recreate_window); virtual GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) = 0; @@ -883,7 +901,7 @@ protected: virtual void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear) = 0; virtual void DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb) = 0; virtual void DoFXAA(GSTexture* sTex, GSTexture* dTex) = 0; - virtual void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) = 0; + virtual void DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) = 0; /// Resolves CAS shader includes for the specified source. static bool GetCASShaderSource(std::string* source); @@ -920,6 +938,8 @@ public: /// Returns the maximum number of mipmap levels for a given texture size. static int GetMipmapLevelsForSize(int width, int height); + GSTexture::Format GetEmuHWRTTexFormat() const { return m_emulation_hw_rt_texture_format; } + __fi u64 GetPoolMemoryUsage() const { return m_pool_memory_usage; } __fi FeatureSupport Features() const { return m_features; } @@ -1036,12 +1056,14 @@ public: void Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c); void Interlace(const GSVector2i& ds, int field, int mode, float yoffset); void FXAA(); - void ShadeBoost(); + void ColorCorrect(); + // Resizes the current RT (post processing only) void Resize(int width, int height); void CAS(GSTexture*& tex, GSVector4i& src_rect, GSVector4& src_uv, const GSVector4& draw_rect, bool sharpen_only); - bool ResizeRenderTarget(GSTexture** t, int w, int h, bool preserve_contents, bool recycle); + // Creates or resizes a render target + bool ResizeRenderTarget(GSTexture** t, int w, int h, bool preserve_contents, bool recycle, GSTexture::Format default_format = GSTexture::Format::Color); void AgePool(); void PurgePool(); diff --git a/pcsx2/GS/Renderers/Common/GSRenderer.cpp b/pcsx2/GS/Renderers/Common/GSRenderer.cpp index 3b1b4aba3e..d6d5afb2ae 100644 --- a/pcsx2/GS/Renderers/Common/GSRenderer.cpp +++ b/pcsx2/GS/Renderers/Common/GSRenderer.cpp @@ -223,8 +223,9 @@ bool GSRenderer::Merge(int field) g_gs_device->Interlace(fs, field ^ field2, mode, offset); } - if (GSConfig.ShadeBoost) - g_gs_device->ShadeBoost(); + // The color correction pass needs to run all the times in HDR as it linearizes the color for scRGB HDR + if (EmuConfig.HDROutput || GSConfig.ColorCorrect || GSConfig.ShadeBoost) + g_gs_device->ColorCorrect(); if (GSConfig.FXAA) g_gs_device->FXAA(); @@ -774,7 +775,7 @@ void GSRenderer::VSync(u32 field, bool registers_written, bool idle_frame) // TODO: Maybe avoid this copy in the future? We can use swscale to fix it up on the dumping thread.. if (current->GetSize() != size) { - GSTexture* temp = g_gs_device->CreateRenderTarget(size.x, size.y, GSTexture::Format::Color, false); + GSTexture* temp = g_gs_device->CreateRenderTarget(size.x, size.y, GSCapture::CAPTURE_TEX_FORMAT, false); if (temp) { g_gs_device->StretchRect(current, temp, GSVector4(0, 0, size.x, size.y)); @@ -791,7 +792,7 @@ void GSRenderer::VSync(u32 field, bool registers_written, bool idle_frame) { // Bit janky, but unless we want to make variable frame rate files, we need to deliver *a* frame to // the video file, so just grab a blank RT. - GSTexture* temp = g_gs_device->CreateRenderTarget(size.x, size.y, GSTexture::Format::Color, true); + GSTexture* temp = g_gs_device->CreateRenderTarget(size.x, size.y, GSCapture::CAPTURE_TEX_FORMAT, true); if (temp) { GSCapture::DeliverVideoFrame(temp); @@ -1009,10 +1010,10 @@ bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool const u32 image_height = crop_borders ? draw_height : std::max(draw_height, window_height); // We're not expecting screenshots to be fast, so just allocate a download texture on demand. - GSTexture* rt = g_gs_device->CreateRenderTarget(draw_width, draw_height, GSTexture::Format::Color, false); + GSTexture* rt = g_gs_device->CreateRenderTarget(draw_width, draw_height, GSCapture::CAPTURE_TEX_FORMAT, false); if (rt) { - std::unique_ptr dl(g_gs_device->CreateDownloadTexture(draw_width, draw_height, GSTexture::Format::Color)); + std::unique_ptr dl(g_gs_device->CreateDownloadTexture(draw_width, draw_height, GSCapture::CAPTURE_TEX_FORMAT)); if (dl) { const GSVector4i rc(0, 0, draw_width, draw_height); diff --git a/pcsx2/GS/Renderers/Common/GSTexture.cpp b/pcsx2/GS/Renderers/Common/GSTexture.cpp index 4de4714759..af4baa8fb2 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.cpp +++ b/pcsx2/GS/Renderers/Common/GSTexture.cpp @@ -42,7 +42,7 @@ bool GSTexture::Save(const std::string& fn) case Format::UNorm8: format = GSPng::R8I_PNG; break; - case Format::Color: + case Format::Color: /*GSCapture::CAPTURE_TEX_FORMAT*/ break; default: Console.Error("Format %d not saved to image", static_cast(m_format)); @@ -79,7 +79,8 @@ const char* GSTexture::GetFormatName(Format format) "BC3", "BC7", }; - return format_names[(static_cast(format) < std::size(format_names)) ? static_cast(format) : 0]; + static_assert(std::size(format_names) == (static_cast(Format::Last) + 1), ""); + return format_names[static_cast(format)]; } u32 GSTexture::GetCompressedBytesPerBlock() const @@ -105,7 +106,7 @@ u32 GSTexture::GetCompressedBytesPerBlock(Format format) 16, // BC3 - 16 pixels in 128 bits 16, // BC7 - 16 pixels in 128 bits }; - + static_assert(std::size(bytes_per_block) == (static_cast(Format::Last) + 1), ""); return bytes_per_block[static_cast(format)]; } diff --git a/pcsx2/GS/Renderers/DX11/D3D.cpp b/pcsx2/GS/Renderers/DX11/D3D.cpp index faa4d251f4..d831ad05fe 100644 --- a/pcsx2/GS/Renderers/DX11/D3D.cpp +++ b/pcsx2/GS/Renderers/DX11/D3D.cpp @@ -91,6 +91,7 @@ std::vector D3D::GetAdapterInfo(IDXGIFactory5* factory) if (SUCCEEDED(hr = adapter->EnumOutputs(0, &output))) { UINT num_modes = 0; + // This will work the same regardless of the SDR/HDR format we pass in if (SUCCEEDED(hr = output->GetDisplayModeList(DXGI_FORMAT_R8G8B8A8_UNORM, 0, &num_modes, nullptr))) { std::vector dmodes(num_modes); diff --git a/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp b/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp index 3e0b7754a1..c4c5fd12d1 100644 --- a/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp +++ b/pcsx2/GS/Renderers/DX11/D3D11ShaderCache.cpp @@ -59,6 +59,7 @@ bool D3D11ShaderCache::Open(D3D_FEATURE_LEVEL feature_level, bool debug) m_feature_level = feature_level; m_debug = debug; +#if !PCSX2_DEVBUILD if (!GSConfig.DisableShaderCache) { const std::string base_filename = GetCacheBaseFileName(feature_level, debug); @@ -68,6 +69,7 @@ bool D3D11ShaderCache::Open(D3D_FEATURE_LEVEL feature_level, bool debug) if (!ReadExisting(index_filename, blob_filename)) return CreateNew(index_filename, blob_filename); } +#endif return true; } diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 8077bee3de..ae7ff47272 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -211,9 +211,14 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) return false; } + ShaderMacro sm_emulation; + sm_emulation.AddMacro("PS_HDR", EmuConfig.HDRRendering ? "1" : "0"); + ShaderMacro sm_postprocess; + sm_postprocess.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); + for (size_t i = 0; i < std::size(m_convert.ps); i++) { - m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, nullptr, shaderName(static_cast(i))); + m_convert.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *convert_hlsl, sm_emulation.GetPtr(), shaderName(static_cast(i))); if (!m_convert.ps[i]) return false; } @@ -229,7 +234,7 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) for (size_t i = 0; i < std::size(m_present.ps); i++) { - m_present.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, shaderName(static_cast(i))); + m_present.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm_postprocess.GetPtr(), shaderName(static_cast(i))); if (!m_present.ps[i]) return false; } @@ -277,7 +282,7 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) for (size_t i = 0; i < std::size(m_merge.ps); i++) { const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%d", i)); - m_merge.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str()); + m_merge.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm_emulation.GetPtr(), entry_point.c_str()); if (!m_merge.ps[i]) return false; } @@ -308,28 +313,32 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) shader = ReadShaderSource("shaders/dx11/interlace.fx"); if (!shader.has_value()) return false; + for (size_t i = 0; i < std::size(m_interlace.ps); i++) { const std::string entry_point(StringUtil::StdStringFromFormat("ps_main%d", i)); - m_interlace.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, entry_point.c_str()); + m_interlace.ps[i] = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm_emulation.GetPtr(), entry_point.c_str()); if (!m_interlace.ps[i]) return false; } - // Shade Boost + // Color Correct memset(&bd, 0, sizeof(bd)); - bd.ByteWidth = sizeof(float) * 4; + bd.ByteWidth = sizeof(ColorCorrectConstantBuffer); bd.Usage = D3D11_USAGE_DEFAULT; bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - m_dev->CreateBuffer(&bd, nullptr, m_shadeboost.cb.put()); + m_dev->CreateBuffer(&bd, nullptr, m_colorcorrect.cb.put()); - shader = ReadShaderSource("shaders/dx11/shadeboost.fx"); + shader = ReadShaderSource("shaders/dx11/colorcorrect.fx"); if (!shader.has_value()) return false; - m_shadeboost.ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, nullptr, "ps_main"); - if (!m_shadeboost.ps) + + sm_postprocess.AddMacro("PS_HDR_INPUT", EmuConfig.HDRRendering ? "1" : "0"); + sm_postprocess.AddMacro("PS_HDR_OUTPUT", EmuConfig.HDROutput ? "1" : "0"); + m_colorcorrect.ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm_postprocess.GetPtr(), "ps_main"); + if (!m_colorcorrect.ps) return false; // Vertex/Index Buffer @@ -529,7 +538,7 @@ void GSDevice11::Destroy() m_present = {}; m_merge = {}; m_interlace = {}; - m_shadeboost = {}; + m_colorcorrect = {}; m_date = {}; m_cas = {}; m_imgui = {}; @@ -645,11 +654,16 @@ u32 GSDevice11::GetSwapChainBufferCount() const bool GSDevice11::CreateSwapChain() { - constexpr DXGI_FORMAT swap_chain_format = DXGI_FORMAT_R8G8B8A8_UNORM; + constexpr DXGI_FORMAT swap_chain_hdr_format = DXGI_FORMAT_R16G16B16A16_FLOAT; // GSTexture::Format::ColorHDR. Automatically enables scRGB HDR if set on creation. + constexpr DXGI_FORMAT swap_chain_sdr_format = DXGI_FORMAT_R10G10B10A2_UNORM; // GSTexture::Format::ColorHQ. if (m_window_info.type != WindowInfo::Type::Win32) return false; + const DXGI_FORMAT swap_chain_format = EmuConfig.HDROutput ? swap_chain_hdr_format : swap_chain_sdr_format; + // For now these are expected to be identical, but it's probably not necessary + pxAssert(swap_chain_format == GSTexture11::GetDXGIFormat(m_postprocess_texture_format)); + const HWND window_hwnd = reinterpret_cast(m_window_info.window_handle); RECT client_rc{}; GetClientRect(window_hwnd, &client_rc); @@ -1187,7 +1201,7 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height D3D11_TEXTURE2D_DESC desc = {}; desc.Width = width; desc.Height = height; - desc.Format = GSTexture11::GetDXGIFormat(format); + desc.Format = GSTexture11::GetDXGIFormat(format, type); desc.MipLevels = levels; desc.ArraySize = 1; desc.SampleDesc.Count = 1; @@ -1213,6 +1227,18 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height break; } +#if OLD_HDR // Add RT to allow textures of different formats to be copied in it + if (format == GSTexture::Format::Color) + { + switch (type) + { + case GSTexture::Type::Texture: + case GSTexture::Type::RWTexture: + desc.BindFlags |= D3D11_BIND_RENDER_TARGET; + } + } +#endif + wil::com_ptr_nothrow texture; HRESULT hr = m_dev->CreateTexture2D(&desc, nullptr, texture.put()); if (FAILED(hr)) @@ -1244,6 +1270,19 @@ void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, const bool depth = (sTex->GetType() == GSTexture::Type::DepthStencil); auto pBox = depth ? nullptr : &box; + if (sTex->GetFormat() == dTex->GetFormat()) + { + if (EmuConfig.HDRRendering && sTex->GetFormat() == GSTexture::Format::Color) + { +#if OLD_HDR + pxAssertMsg((sTex->GetType() == GSTexture::Type::RenderTarget || sTex->GetType() == GSTexture::Type::RWTexture) == (dTex->GetType() == GSTexture::Type::RenderTarget || dTex->GetType() == GSTexture::Type::RWTexture), "CopyRect Source and Target are of different types."); +#endif + } + } + else + { + pxAssertMsg(false, "CopyRect between different formats."); + } m_ctx->CopySubresourceRegion(*(GSTexture11*)dTex, 0, destX, destY, 0, *(GSTexture11*)sTex, 0, pBox); } @@ -1295,6 +1334,8 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* const bool draw_in_depth = dTex && dTex->IsDepthStencil(); + pxAssert(sTex && dTex); + GSVector2i ds; if (dTex) { @@ -1303,6 +1344,9 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* OMSetRenderTargets(nullptr, dTex); else OMSetRenderTargets(dTex, nullptr); +#if !OLD_HDR + pxAssert(dTex->IsRenderTargetOrDepthStencil()); +#endif } else { @@ -1333,7 +1377,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* }; - IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices)); + IASetVertexBuffer(vertices, sizeof(vertices[0]), std::size(vertices)); IASetInputLayout(m_convert.il.get()); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); @@ -1350,6 +1394,17 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* // DrawPrimitive(); + +#if 1 + ID3D11RenderTargetView* render_target_view = nullptr; + m_ctx->OMGetRenderTargets(1, &render_target_view, nullptr); + pxAssert(draw_in_depth || render_target_view); + if (render_target_view) + { + render_target_view->Release(); + render_target_view = nullptr; + } +#endif } void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) @@ -1371,6 +1426,7 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* cb.SetSource(sRect, sTex->GetSize()); cb.SetTarget(dRect, ds); cb.SetTime(shaderTime); + cb.SetBrightness(EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f); m_ctx->UpdateSubresource(m_present.ps_cb.get(), 0, nullptr, &cb, 0, 0); // om @@ -1626,6 +1682,7 @@ void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex) ShaderMacro sm; sm.AddMacro("FXAA_HLSL", "1"); + sm.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); m_fxaa_ps = m_shader_cache.GetPixelShader(m_dev.get(), *shader, sm.GetPtr(), "main"); if (!m_fxaa_ps) return; @@ -1634,16 +1691,16 @@ void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex) StretchRect(sTex, sRect, dTex, dRect, m_fxaa_ps.get(), nullptr, true); } -void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) +void GSDevice11::DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) { const GSVector2i s = dTex->GetSize(); const GSVector4 sRect(0, 0, 1, 1); const GSVector4 dRect(0, 0, s.x, s.y); - m_ctx->UpdateSubresource(m_shadeboost.cb.get(), 0, nullptr, params, 0, 0); + m_ctx->UpdateSubresource(m_colorcorrect.cb.get(), 0, nullptr, &cb, 0, 0); - StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps.get(), m_shadeboost.cb.get(), false); + StretchRect(sTex, sRect, dTex, dRect, m_colorcorrect.ps.get(), m_colorcorrect.cb.get(), false); } void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb) @@ -1759,6 +1816,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb); sm.AddMacro("PS_NO_COLOR", sel.no_color); sm.AddMacro("PS_NO_COLOR1", sel.no_color1); + sm.AddMacro("PS_HDR", EmuConfig.HDRRendering); wil::com_ptr_nothrow ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main"); i = m_ps.try_emplace(sel, std::move(ps)).first; @@ -1879,6 +1937,17 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 OMSetDepthStencilState(i->second.get(), 1); + D3D11_DEPTH_STENCIL_DESC dsd; + i->second.get()->GetDesc(&dsd); + + static bool upgrade_blends = false; //TODO: decide etc + if (upgrade_blends) + { + u8* key = (u8*)&(bsel.key); + u8* key_pad_1 = key++; + *key_pad_1 = u8(dsd.DepthEnable); + } + auto j = std::as_const(m_om_bs).find(bsel.key); if (j == m_om_bs.end()) @@ -1908,6 +1977,32 @@ void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 bd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; bd.RenderTarget[0].SrcBlendAlpha = s_d3d11_blend_factors[bsel.blend.src_factor_alpha]; bd.RenderTarget[0].DestBlendAlpha = s_d3d11_blend_factors[bsel.blend.dst_factor_alpha]; + + if (EmuConfig.HDRRendering && upgrade_blends) + { + //TODO: emulate these in pixel shaders too for the highest quality blending mode + // Turn background darkening additive alpha into pure additive alpha, this is for two reasons: + // - If the background was already beyond 1 (or below 0) the math breaks! + // - This is (mostly...) used to avoid colors clipping, but we don't need to if HDR is enabled! + if (dsd.DepthEnable && bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_ADD && (bd.RenderTarget[0].SrcBlend == D3D11_BLEND_SRC_ALPHA || bd.RenderTarget[0].SrcBlend == D3D11_BLEND_ONE) && bd.RenderTarget[0].DestBlend == D3D11_BLEND_INV_SRC_ALPHA) + { + bd.RenderTarget[0].DestBlend = D3D11_BLEND_ONE; + } + else if (dsd.DepthEnable && bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_ADD && bd.RenderTarget[0].SrcBlend == D3D11_BLEND_INV_DEST_COLOR && bd.RenderTarget[0].DestBlend == D3D11_BLEND_ONE) + { + bd.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; + } + else if (dsd.DepthEnable && bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_ADD && bd.RenderTarget[0].SrcBlend == D3D11_BLEND_ONE && (bd.RenderTarget[0].DestBlend == D3D11_BLEND_INV_SRC_COLOR || bd.RenderTarget[0].DestBlend == D3D11_BLEND_INV_SRC1_COLOR)) + { + bd.RenderTarget[0].DestBlend = D3D11_BLEND_ONE; + } +#if 0 // Probably not needed??? Also what's the diff between D3D11_BLEND_INV_SRC_COLOR and D3D11_BLEND_INV_SRC1_COLOR + else if (bd.RenderTarget[0].BlendOp == D3D11_BLEND_OP_ADD && bd.RenderTarget[0].DestBlend == D3D11_BLEND_ONE && (bd.RenderTarget[0].SrcBlend == D3D11_BLEND_INV_SRC_COLOR || bd.RenderTarget[0].SrcBlend == D3D11_BLEND_INV_SRC1_COLOR)) + { + bd.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; + } +#endif + } } if (bsel.colormask.wr) @@ -1999,9 +2094,12 @@ bool GSDevice11::CreateImGuiResources() }; // clang-format on + ShaderMacro sm; + sm.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); + if (!m_shader_cache.GetVertexShaderAndInputLayout(m_dev.get(), m_imgui.vs.put(), m_imgui.il.put(), layout, std::size(layout), hlsl.value(), nullptr, "vs_main") || - !(m_imgui.ps = m_shader_cache.GetPixelShader(m_dev.get(), hlsl.value(), nullptr, "ps_main"))) + !(m_imgui.ps = m_shader_cache.GetPixelShader(m_dev.get(), hlsl.value(), sm.GetPtr(), "ps_main"))) { Console.Error("D3D11: Failed to compile ImGui shaders"); return false; @@ -2025,9 +2123,9 @@ bool GSDevice11::CreateImGuiResources() D3D11_BUFFER_DESC buffer_desc = {}; buffer_desc.Usage = D3D11_USAGE_DEFAULT; - buffer_desc.ByteWidth = sizeof(float) * 4 * 4; + buffer_desc.ByteWidth = sizeof(float) * ((4 * 4) + 4); buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - hr = m_dev->CreateBuffer(&buffer_desc, nullptr, m_imgui.vs_cb.put()); + hr = m_dev->CreateBuffer(&buffer_desc, nullptr, m_imgui.cb.put()); if (FAILED(hr)) { Console.Error("D3D11: CreateImGuiResources(): CreateBlendState() failed: %08X", hr); @@ -2036,7 +2134,6 @@ bool GSDevice11::CreateImGuiResources() return true; } - void GSDevice11::RenderImGui() { ImGui::Render(); @@ -2059,15 +2156,23 @@ void GSDevice11::RenderImGui() }; // clang-format on - m_ctx->UpdateSubresource(m_imgui.vs_cb.get(), 0, nullptr, ortho_projection, 0, 0); + float cb[(4 * 4) + 4]; + std::memcpy(&cb, &ortho_projection, sizeof(ortho_projection)); + + // Imgui currently follows the same brightness as the whole HDR image (applied earlier on presentation), + // we could expose this variable to users to make it brightness + float imgui_hdr_brightness_nits = GSConfig.HDR_BrightnessNits; + cb[4 * 4] = EmuConfig.HDROutput ? (imgui_hdr_brightness_nits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f; + + m_ctx->UpdateSubresource(m_imgui.cb.get(), 0, nullptr, cb, 0, 0); const UINT vb_stride = sizeof(ImDrawVert); const UINT vb_offset = 0; m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset); IASetInputLayout(m_imgui.il.get()); IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get()); - PSSetShader(m_imgui.ps.get(), nullptr); + VSSetShader(m_imgui.vs.get(), m_imgui.cb.get()); + PSSetShader(m_imgui.ps.get(), m_imgui.cb.get()); OMSetBlendState(m_imgui.bs.get(), 0.0f); OMSetDepthStencilState(m_convert.dss.get(), 0); PSSetSamplerState(m_convert.ln.get()); @@ -2345,6 +2450,11 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) void GSDevice11::PSSetShaderResource(int i, GSTexture* sr) { + if (sr == nullptr) + { + m_state.ps_sr_views[i] = nullptr; + return; + } m_state.ps_sr_views[i] = *static_cast(sr); } @@ -2619,6 +2729,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) } IASetPrimitiveTopology(topology); + bool set_ps_srv_1 = false; + bool set_ps_srv_2 = false; + bool set_ps_srv_3 = false; + if (config.tex) { CommitClear(config.tex); @@ -2628,6 +2742,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) { CommitClear(config.pal); PSSetShaderResource(1, config.pal); + set_ps_srv_1 = true; } GSTexture* rt_copy = nullptr; @@ -2641,7 +2756,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) if (rt_copy) { if (config.require_one_barrier) + { PSSetShaderResource(2, rt_copy); + set_ps_srv_2 = true; + } if (config.tex && config.tex == config.rt) PSSetShaderResource(0, rt_copy); } @@ -2664,6 +2782,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) config.alpha_second_pass.ps.date = 3; SetupPS(config.ps, nullptr, config.sampler); PSSetShaderResource(3, primid_tex); + set_ps_srv_3 = true; } SetupOM(config.depth, OMBlendSelector(config.colormask, config.blend), config.blend.constant); @@ -2697,6 +2816,21 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) DrawIndexedPrimitive(); } +#if defined(PCSX2_DEVBUILD) // Clear them for easier debugging + if (set_ps_srv_1) + { + PSSetShaderResource(1, nullptr); + } + if (set_ps_srv_2) + { + PSSetShaderResource(2, nullptr); + } + if (set_ps_srv_3) + { + PSSetShaderResource(3, nullptr); + } +#endif + if (rt_copy) Recycle(rt_copy); if (primid_tex) diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.h b/pcsx2/GS/Renderers/DX11/GSDevice11.h index a1c1d31e7a..502373aff4 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.h +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.h @@ -105,7 +105,7 @@ private: void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear) override; void DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb) override; void DoFXAA(GSTexture* sTex, GSTexture* dTex) override; - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) override; + void DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) override; bool CreateCASShaders(); bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) override; @@ -206,7 +206,7 @@ private: { wil::com_ptr_nothrow ps; wil::com_ptr_nothrow cb; - } m_shadeboost; + } m_colorcorrect; struct { @@ -228,7 +228,7 @@ private: wil::com_ptr_nothrow vs; wil::com_ptr_nothrow ps; wil::com_ptr_nothrow bs; - wil::com_ptr_nothrow vs_cb; + wil::com_ptr_nothrow cb; } m_imgui; // Shaders... diff --git a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp index fa9e9b5953..9c440b9f0a 100644 --- a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp @@ -23,12 +23,19 @@ GSTexture11::GSTexture11(wil::com_ptr_nothrow texture, const D3 m_mipmap_levels = static_cast(desc.MipLevels); } -DXGI_FORMAT GSTexture11::GetDXGIFormat(Format format) +DXGI_FORMAT GSTexture11::GetDXGIFormat(Format format, Type type) { // clang-format off switch (format) { - case GSTexture::Format::Color: return DXGI_FORMAT_R8G8B8A8_UNORM; + case GSTexture::Format::Color: /*return DXGI_FORMAT_R8G8B8A8_UNORM;*/ +#if OLD_HDR //TODO: clean all up! + if (EmuConfig.HDRRendering && (type == GSTexture::Type::RenderTarget || type == GSTexture::Type::RWTexture)) + { + return DXGI_FORMAT_R16G16B16A16_FLOAT; + } +#endif + return DXGI_FORMAT_R8G8B8A8_UNORM; case GSTexture::Format::ColorHQ: return DXGI_FORMAT_R10G10B10A2_UNORM; case GSTexture::Format::ColorHDR: return DXGI_FORMAT_R16G16B16A16_FLOAT; case GSTexture::Format::ColorClip: return DXGI_FORMAT_R16G16B16A16_UNORM; @@ -67,6 +74,15 @@ bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch, int l const D3D11_BOX box = {Common::AlignDownPow2((u32)r.left, bs), Common::AlignDownPow2((u32)r.top, bs), 0U, Common::AlignUpPow2((u32)r.right, bs), Common::AlignUpPow2((u32)r.bottom, bs), 1U}; const UINT subresource = layer; // MipSlice + (ArraySlice * MipLevels). + + if (EmuConfig.HDRRendering && GetFormat() == GSTexture::Format::Color) + { +#if OLD_HDR + pxAssertMsg(GetType() != GSTexture::Type::RenderTarget && GetType() != GSTexture::Type::RWTexture, "GSTexture11::Update unsupported format."); +#endif + } + // All the calls to this expect "GSTexture::Format::Color" (8bpc) at the moment + pxAssertMsg(!EmuConfig.HDRRendering || GetFormat() == GSTexture::Format::Color || GetFormat() >= GSTexture::Format::DepthStencil, "GSTexture11::Update unsupported format."); GSDevice11::GetInstance()->GetD3DContext()->UpdateSubresource(m_texture.get(), subresource, &box, data, pitch, 0); m_needs_mipmaps_generated |= (layer == 0); @@ -212,7 +228,7 @@ std::unique_ptr GSDownloadTexture11::Create(u32 width, u32 void GSDownloadTexture11::CopyFromTexture( const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) { - pxAssert(stex->GetFormat() == m_format); + pxAssert(stex->GetFormat() == GetFormat()); pxAssert(drc.width() == src.width() && drc.height() == src.height()); pxAssert(src.z <= stex->GetWidth() && src.w <= stex->GetHeight()); pxAssert(static_cast(drc.z) <= m_width && static_cast(drc.w) <= m_height); @@ -225,6 +241,14 @@ void GSDownloadTexture11::CopyFromTexture( if (IsMapped()) Unmap(); + if (EmuConfig.HDRRendering && stex->GetFormat() == GSTexture::Format::Color) + { +#if OLD_HDR + pxAssertMsg(stex->GetType() != GSTexture::Type::RenderTarget && stex->GetType() != GSTexture::Type::RWTexture, "CopyFromTexture unsupported format."); +#endif + } + pxAssertMsg(GetFormat() == stex->GetFormat(), "CopyFromTexture between different formats."); + // depth textures need to copy the whole thing.. if (m_format == GSTexture::Format::DepthStencil) { diff --git a/pcsx2/GS/Renderers/DX11/GSTexture11.h b/pcsx2/GS/Renderers/DX11/GSTexture11.h index 37ef01c4b0..5d6a03adc4 100644 --- a/pcsx2/GS/Renderers/DX11/GSTexture11.h +++ b/pcsx2/GS/Renderers/DX11/GSTexture11.h @@ -23,7 +23,7 @@ public: explicit GSTexture11(wil::com_ptr_nothrow texture, const D3D11_TEXTURE2D_DESC& desc, GSTexture::Type type, GSTexture::Format format); - static DXGI_FORMAT GetDXGIFormat(Format format); + static DXGI_FORMAT GetDXGIFormat(Format format, Type type = Type::Invalid); void* GetNativeHandle() const override; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 8bd107b8a1..e7d5bf0550 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -47,6 +47,9 @@ static constexpr std::array s_primitive_topology_ma static constexpr std::array s_present_clear_color = {}; +constexpr DXGI_FORMAT swap_chain_hdr_format = DXGI_FORMAT_R16G16B16A16_FLOAT; // GSTexture::Format::ColorHDR. Automatically enables scRGB HDR if set on creation. +constexpr DXGI_FORMAT swap_chain_sdr_format = DXGI_FORMAT_R10G10B10A2_UNORM; // GSTexture::Format::ColorHQ. + static D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE GetLoadOpForTexture(GSTexture12* tex) { if (!tex) @@ -697,6 +700,10 @@ bool GSDevice12::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) m_name = D3D::GetAdapterName(m_adapter.get()); + m_swap_chain_format = EmuConfig.HDROutput ? swap_chain_hdr_format : swap_chain_sdr_format; + // For now these are expected to be identical, but it's probably not necessary + pxAssert(m_swap_chain_format == GetNativeFormat(m_postprocess_texture_format)); + if (!CreateDescriptorHeaps() || !CreateCommandLists() || !CreateTimestampQuery()) return false; @@ -800,8 +807,6 @@ u32 GSDevice12::GetSwapChainBufferCount() const bool GSDevice12::CreateSwapChain() { - constexpr DXGI_FORMAT swap_chain_format = DXGI_FORMAT_R8G8B8A8_UNORM; - if (m_window_info.type != WindowInfo::Type::Win32) return false; @@ -818,7 +823,7 @@ bool GSDevice12::CreateSwapChain() m_is_exclusive_fullscreen = GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) && D3D::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.get(), client_rc, fullscreen_width, - fullscreen_height, fullscreen_refresh_rate, swap_chain_format, &fullscreen_mode, + fullscreen_height, fullscreen_refresh_rate, m_swap_chain_format, &fullscreen_mode, fullscreen_output.put()); // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen. @@ -836,7 +841,7 @@ bool GSDevice12::CreateSwapChain() DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; swap_chain_desc.Width = static_cast(client_rc.right - client_rc.left); swap_chain_desc.Height = static_cast(client_rc.bottom - client_rc.top); - swap_chain_desc.Format = swap_chain_format; + swap_chain_desc.Format = m_swap_chain_format; swap_chain_desc.SampleDesc.Count = 1; swap_chain_desc.BufferCount = GetSwapChainBufferCount(); swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; @@ -933,8 +938,9 @@ bool GSDevice12::CreateSwapChainRTV() return false; } + pxAssert(swap_chain_desc.BufferDesc.Format == GetNativeFormat(m_postprocess_texture_format)); std::unique_ptr tex = GSTexture12::Adopt(std::move(backbuffer), GSTexture::Type::RenderTarget, - GSTexture::Format::Color, swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, 1, + m_postprocess_texture_format, swap_chain_desc.BufferDesc.Width, swap_chain_desc.BufferDesc.Height, 1, swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, swap_chain_desc.BufferDesc.Format, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_STATE_COMMON); if (!tex) @@ -1265,13 +1271,21 @@ void GSDevice12::DrawIndexedPrimitive(int offset, int count) } void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_format, DXGI_FORMAT* srv_format, - DXGI_FORMAT* rtv_format, DXGI_FORMAT* dsv_format) const + DXGI_FORMAT* rtv_format, DXGI_FORMAT* dsv_format, GSTexture::Type type) const { +#if OLD_HDR + static constexpr std::array, static_cast(GSTexture::Format::Last) + 2> +#else static constexpr std::array, static_cast(GSTexture::Format::Last) + 1> +#endif s_format_mapping = {{ {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // Invalid {DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN}, // Color +#if OLD_HDR + {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_UNKNOWN}, // Color (upgraded to HDR) +#endif {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN}, // ColorHQ {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, @@ -1290,6 +1304,13 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f {DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN}, // BC7 }}; +#if OLD_HDR + if ((EmuConfig.HDRRendering && (type == GSTexture::Type::RenderTarget || type == GSTexture::Type::RWTexture)) ? format >= GSTexture::Format::Color : format > GSTexture::Format::Color) + { + format = (GSTexture::Format)((u8)format + 1); + } +#endif + const auto& mapping = s_format_mapping[static_cast(format)]; if (d3d_format) *d3d_format = mapping[0]; @@ -1301,10 +1322,17 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f *dsv_format = mapping[3]; } +DXGI_FORMAT GSDevice12::GetNativeFormat(GSTexture::Format format, GSTexture::Type type) const +{ + DXGI_FORMAT d3d_format, srv_format, rtv_format, dsv_format; + LookupNativeFormat(format, &d3d_format, &srv_format, &rtv_format, &dsv_format, type); + return d3d_format; +} + GSTexture* GSDevice12::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) { DXGI_FORMAT dxgi_format, srv_format, rtv_format, dsv_format; - LookupNativeFormat(format, &dxgi_format, &srv_format, &rtv_format, &dsv_format); + LookupNativeFormat(format, &dxgi_format, &srv_format, &rtv_format, &dsv_format, type); const DXGI_FORMAT uav_format = (type == GSTexture::Type::RWTexture) ? dxgi_format : DXGI_FORMAT_UNKNOWN; @@ -1419,8 +1447,28 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), int(dRect.right - dRect.left), int(dRect.bottom - dRect.top)); + if (shader == ShaderConvert::COPY && dTex && dTex->GetFormat() != m_emulation_hw_rt_texture_format) + { + if (dTex->GetFormat() == GSTexture::Format::Color) + { + shader = ShaderConvert::COPY_EMU_LQ; + } + else if (dTex->GetFormat() == m_postprocess_texture_format) + { + shader = ShaderConvert::COPY_POSTPROCESS; + } + else + { + pxAssertMsg(false, "Trying to use the ShaderConvert::COPY shader pipeline to target an unsupported RT format"); + } + } + else + { + pxAssertMsg(dTex, "StretchRect(): The destination texture needs to valid (it used to redirect to the presentation surface but it doesn't anymore)"); + } + DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, - dTex ? m_convert[static_cast(shader)].get() : m_present[static_cast(shader)].get(), linear, + m_convert[static_cast(shader)].get(), linear, ShaderConvertWriteMask(shader) == 0xf); } @@ -1429,6 +1477,8 @@ void GSDevice12::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* { GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha); + pxAssertMsg((shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION) && dTex && dTex->GetFormat() == m_emulation_hw_rt_texture_format, "Trying to use the m_color_copy shader pipeline to target an unsupported RT format"); + const u32 index = (red ? 1 : 0) | (green ? 2 : 0) | (blue ? 4 : 0) | (alpha ? 8 : 0); int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; const bool allow_discard = (index == 0xf); @@ -1443,6 +1493,7 @@ void GSDevice12::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* cb.SetSource(sRect, sTex->GetSize()); cb.SetTarget(dRect, dTex ? dTex->GetSize() : GSVector2i(GetWindowWidth(), GetWindowHeight())); cb.SetTime(shaderTime); + cb.SetBrightness(EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f); SetUtilityRootSignature(); SetUtilityPushConstants(&cb, sizeof(cb)); @@ -1508,6 +1559,8 @@ void GSDevice12::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 SetUtilityRootSignature(); SetUtilityPushConstants(&cb, sizeof(cb)); + pxAssert(dTex->GetFormat() == m_emulation_hw_rt_texture_format); // "ShaderConvert::DOWNSAMPLE_COPY" expects RTs of this format + //const GSVector4 dRect = GSVector4(dTex->GetRect()); const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY; DoStretchRect(static_cast(sTex), GSVector4::zero(), static_cast(dTex), dRect, @@ -1624,6 +1677,7 @@ void GSDevice12::DoMultiStretchRects( SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu); pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); + pxAssert(shader != ShaderConvert::COPY || dTex->GetFormat() == m_emulation_hw_rt_texture_format); int rta_bit = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit].get() : m_convert[static_cast(shader)].get()); @@ -1757,6 +1811,8 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, SetUtilityPushConstants(&uniforms, sizeof(uniforms)); } + pxAssert(dTex->GetFormat() == m_postprocess_texture_format); // "ShaderConvert::COPY_POSTPROCESS" expects RTs of this format + const GSVector2i dsize(dTex->GetSize()); const GSVector4i darea(0, 0, dsize.x, dsize.y); bool dcleared = false; @@ -1771,7 +1827,7 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, GSVector4::unorm8(c)); SetUtilityRootSignature(); - SetPipeline(m_convert[static_cast(ShaderConvert::COPY)].get()); + SetPipeline(m_convert[static_cast(ShaderConvert::COPY_POSTPROCESS)].get()); DrawStretchRect(sRect[1], PMODE.SLBG ? dRect[2] : dRect[1], dsize); dTex->SetState(GSTexture::State::Dirty); dcleared = true; @@ -1833,6 +1889,7 @@ void GSDevice12::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, if (feedback_write_1) // FIXME I'm not sure dRect[0] is always correct { + pxAssert(sTex[2]->GetFormat() == m_emulation_hw_rt_texture_format); // "ShaderConvert::YUV" expects RTs of this format EndRenderPass(); SetUtilityRootSignature(); SetPipeline(m_convert[static_cast(ShaderConvert::YUV)].get()); @@ -1872,7 +1929,7 @@ void GSDevice12::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } -void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) +void GSDevice12::DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) { const GSVector4 sRect = GSVector4(0.0f, 0.0f, 1.0f, 1.0f); const GSVector4i dRect = dTex->GetRect(); @@ -1883,8 +1940,8 @@ void GSDevice12::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float para BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS); dTex->SetState(GSTexture::State::Dirty); - SetPipeline(m_shadeboost_pipeline.get()); - SetUtilityPushConstants(params, sizeof(float) * 4); + SetPipeline(m_colorcorrect_pipeline.get()); + SetUtilityPushConstants(&cb, sizeof(cb)); DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize()); EndRenderPass(); @@ -1954,8 +2011,11 @@ bool GSDevice12::CompileImGuiPipeline() return false; } + ShaderMacro sm; + sm.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); + const ComPtr vs = m_shader_cache.GetVertexShader(hlsl.value(), nullptr, "vs_main"); - const ComPtr ps = m_shader_cache.GetPixelShader(hlsl.value(), nullptr, "ps_main"); + const ComPtr ps = m_shader_cache.GetPixelShader(hlsl.value(), sm.GetPtr(), "ps_main"); if (!vs || !ps) { Console.Error("D3D12: Failed to compile ImGui shaders"); @@ -1974,7 +2034,7 @@ bool GSDevice12::CompileImGuiPipeline() gpb.SetNoDepthTestState(); gpb.SetBlendState(0, true, D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD); - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, m_swap_chain_format); m_imgui_pipeline = gpb.Create(m_device.get(), m_shader_cache, false); if (!m_imgui_pipeline) @@ -2009,8 +2069,16 @@ void GSDevice12::RenderImGui() }; // clang-format on + float cb[(4 * 4) + 4]; + std::memcpy(&cb, &ortho_projection, sizeof(ortho_projection)); + + // Imgui currently follows the same brightness as the whole HDR image (applied earlier on presentation), + // we could expose this variable to users to make it brightness + float imgui_hdr_brightness_nits = GSConfig.HDR_BrightnessNits; + cb[4 * 4] = EmuConfig.HDROutput ? (imgui_hdr_brightness_nits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f; + SetUtilityRootSignature(); - SetUtilityPushConstants(ortho_projection, sizeof(ortho_projection)); + SetUtilityPushConstants(cb, sizeof(cb)); SetPipeline(m_imgui_pipeline.get()); SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); @@ -2333,6 +2401,11 @@ GSDevice12::ComPtr GSDevice12::GetUtilityPixelShader(const std::string return m_shader_cache.GetPixelShader(source, sm_model.GetPtr(), entry_point); } +GSDevice12::ComPtr GSDevice12::GetUtilityPixelShader(const std::string& source, const char* entry_point, ShaderMacro& shader_macro) +{ + return m_shader_cache.GetPixelShader(source, shader_macro.GetPtr(), entry_point); +} + bool GSDevice12::CreateNullTexture() { m_null_texture = @@ -2439,6 +2512,9 @@ bool GSDevice12::CompileConvertPipelines() gpb.SetNoBlendingState(); gpb.SetVertexShader(m_convert_vs.get()); + ShaderMacro sm; + sm.AddMacro("PS_HDR", EmuConfig.HDRRendering ? "1" : "0"); + for (ShaderConvert i = ShaderConvert::COPY; static_cast(i) < static_cast(ShaderConvert::Count); i = static_cast(static_cast(i) + 1)) { @@ -2469,9 +2545,21 @@ bool GSDevice12::CompileConvertPipelines() gpb.SetDepthStencilFormat(DXGI_FORMAT_D32_FLOAT_S8X24_UINT); } break; + case ShaderConvert::COPY_EMU_LQ: + { + gpb.SetRenderTarget(0, GetNativeFormat(GSTexture::Format::Color, GSTexture::Type::RenderTarget)); + gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); + } + break; + case ShaderConvert::COPY_POSTPROCESS: + { + gpb.SetRenderTarget(0, GetNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget)); + gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); + } + break; default: { - depth ? gpb.ClearRenderTargets() : gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + depth ? gpb.ClearRenderTargets() : gpb.SetRenderTarget(0, GetNativeFormat(m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget)); gpb.SetDepthStencilFormat(depth ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN); } break; @@ -2492,7 +2580,7 @@ bool GSDevice12::CompileConvertPipelines() gpb.SetColorWriteMask(0, ShaderConvertWriteMask(i)); - ComPtr ps(GetUtilityPixelShader(*shader, shaderName(i))); + ComPtr ps(GetUtilityPixelShader(*shader, shaderName(i), sm)); if (!ps) return false; @@ -2507,7 +2595,7 @@ bool GSDevice12::CompileConvertPipelines() if (i == ShaderConvert::COPY) { // compile color copy pipelines - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, GetNativeFormat(m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget)); gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); for (u32 j = 0; j < 16; j++) { @@ -2524,8 +2612,7 @@ bool GSDevice12::CompileConvertPipelines() } else if (i == ShaderConvert::RTA_CORRECTION) { - // compile color copy pipelines - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, GetNativeFormat(m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget)); gpb.SetDepthStencilFormat(DXGI_FORMAT_UNKNOWN); for (u32 j = 16; j < 32; j++) { @@ -2548,7 +2635,7 @@ bool GSDevice12::CompileConvertPipelines() { pxAssert(!arr[ds]); - gpb.SetRenderTarget(0, is_setup ? DXGI_FORMAT_R16G16B16A16_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, is_setup ? GetNativeFormat(GSTexture::Format::ColorClip, GSTexture::Type::RenderTarget) : GetNativeFormat(m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget)); gpb.SetDepthStencilFormat(ds ? DXGI_FORMAT_D32_FLOAT_S8X24_UINT : DXGI_FORMAT_UNKNOWN); arr[ds] = gpb.Create(m_device.get(), m_shader_cache, false); if (!arr[ds]) @@ -2562,7 +2649,7 @@ bool GSDevice12::CompileConvertPipelines() for (u32 datm = 0; datm < 4; datm++) { const std::string entry_point(StringUtil::StdStringFromFormat("ps_stencil_image_init_%d", datm)); - ComPtr ps(GetUtilityPixelShader(*shader, entry_point.c_str())); + ComPtr ps(GetUtilityPixelShader(*shader, entry_point.c_str(), sm)); if (!ps) return false; @@ -2610,14 +2697,16 @@ bool GSDevice12::CompilePresentPipelines() gpb.SetVertexShader(vs.get()); gpb.SetDepthState(false, false, D3D12_COMPARISON_FUNC_ALWAYS); gpb.SetNoStencilState(); - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, m_swap_chain_format); for (PresentShader i = PresentShader::COPY; static_cast(i) < static_cast(PresentShader::Count); i = static_cast(static_cast(i) + 1)) { const int index = static_cast(i); - ComPtr ps(GetUtilityPixelShader(*shader, shaderName(i))); + ShaderMacro sm; + sm.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); + ComPtr ps(GetUtilityPixelShader(*shader, shaderName(i), sm)); if (!ps) return false; @@ -2648,7 +2737,7 @@ bool GSDevice12::CompileInterlacePipelines() gpb.SetNoCullRasterizationState(); gpb.SetNoDepthTestState(); gpb.SetNoBlendingState(); - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, GetNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget)); gpb.SetVertexShader(m_convert_vs.get()); for (int i = 0; i < static_cast(m_interlace.size()); i++) @@ -2683,7 +2772,7 @@ bool GSDevice12::CompileMergePipelines() gpb.SetRootSignature(m_utility_root_signature.get()); gpb.SetNoCullRasterizationState(); gpb.SetNoDepthTestState(); - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, GetNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget)); gpb.SetVertexShader(m_convert_vs.get()); for (int i = 0; i < static_cast(m_merge.size()); i++) @@ -2714,7 +2803,7 @@ bool GSDevice12::CompilePostProcessingPipelines() gpb.SetNoCullRasterizationState(); gpb.SetNoDepthTestState(); gpb.SetNoBlendingState(); - gpb.SetRenderTarget(0, DXGI_FORMAT_R8G8B8A8_UNORM); + gpb.SetRenderTarget(0, GetNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget)); gpb.SetVertexShader(m_convert_vs.get()); { @@ -2727,6 +2816,7 @@ bool GSDevice12::CompilePostProcessingPipelines() ShaderMacro sm; sm.AddMacro("FXAA_HLSL", "1"); + sm.AddMacro("PS_HDR", EmuConfig.HDROutput ? "1" : "0"); ComPtr ps = m_shader_cache.GetPixelShader(*shader, sm.GetPtr()); if (!ps) return false; @@ -2741,24 +2831,27 @@ bool GSDevice12::CompilePostProcessingPipelines() } { - const std::optional shader = ReadShaderSource("shaders/dx11/shadeboost.fx"); + const std::optional shader = ReadShaderSource("shaders/dx11/colorcorrect.fx"); if (!shader) { - Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/shadeboost.fx."); + Host::ReportErrorAsync("GS", "Failed to read shaders/dx11/colorcorrect.fx."); return false; } - ComPtr ps(GetUtilityPixelShader(*shader, "ps_main")); + ShaderMacro sm; + sm.AddMacro("PS_HDR_INPUT", EmuConfig.HDRRendering ? "1" : "0"); + sm.AddMacro("PS_HDR_OUTPUT", EmuConfig.HDROutput ? "1" : "0"); + ComPtr ps(GetUtilityPixelShader(*shader, "ps_main", sm)); if (!ps) return false; gpb.SetPixelShader(ps.get()); - m_shadeboost_pipeline = gpb.Create(m_device.get(), m_shader_cache, false); - if (!m_shadeboost_pipeline) + m_colorcorrect_pipeline = gpb.Create(m_device.get(), m_shader_cache, false); + if (!m_colorcorrect_pipeline) return false; - D3D12::SetObjectName(m_shadeboost_pipeline.get(), "Shadeboost pipeline"); + D3D12::SetObjectName(m_colorcorrect_pipeline.get(), "ColorCorrect pipeline"); } return true; @@ -2784,7 +2877,7 @@ void GSDevice12::DestroyResources() m_colclip_finish_pipelines = {}; m_date_image_setup_pipelines = {}; m_fxaa_pipeline.reset(); - m_shadeboost_pipeline.reset(); + m_colorcorrect_pipeline.reset(); m_imgui_pipeline.reset(); for (const auto& it : m_samplers) @@ -2918,6 +3011,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb); sm.AddMacro("PS_NO_COLOR", sel.no_color); sm.AddMacro("PS_NO_COLOR1", sel.no_color1); + sm.AddMacro("PS_HDR", EmuConfig.HDRRendering); ComPtr ps(m_shader_cache.GetPixelShader(m_tfx_source, sm.GetPtr(), "ps_main")); it = m_tfx_pixel_shaders.emplace(sel, std::move(ps)).first; @@ -2955,10 +3049,10 @@ GSDevice12::ComPtr GSDevice12::CreateTFXPipeline(const Pipe { const GSTexture::Format format = IsDATEModePrimIDInit(p.ps.date) ? GSTexture::Format::PrimID : - (p.ps.colclip_hw ? GSTexture::Format::ColorClip : GSTexture::Format::Color); + (p.ps.colclip_hw ? GSTexture::Format::ColorClip : m_emulation_hw_rt_texture_format); DXGI_FORMAT native_format; - LookupNativeFormat(format, nullptr, nullptr, &native_format, nullptr); + LookupNativeFormat(format, nullptr, nullptr, &native_format, nullptr, GSTexture::Type::RenderTarget); gpb.SetRenderTarget(0, native_format); } if (p.ds) @@ -3386,6 +3480,7 @@ void GSDevice12::RenderTextureMipmap( cmdlist->RSSetScissorRects(1, &scissor); SetUtilityRootSignature(); + pxAssert(texture->GetFormat() == m_emulation_hw_rt_texture_format); // "ShaderConvert::COPY" expects RTs of this format SetPipeline(m_convert[static_cast(ShaderConvert::COPY)].get()); DrawStretchRect(GSVector4(0.0f, 0.0f, 1.0f, 1.0f), GSVector4(0.0f, 0.0f, static_cast(dst_width), static_cast(dst_height)), @@ -3432,7 +3527,7 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b if (color_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) { LookupNativeFormat(m_current_render_target->GetFormat(), nullptr, - &rt.BeginningAccess.Clear.ClearValue.Format, nullptr, nullptr); + &rt.BeginningAccess.Clear.ClearValue.Format, nullptr, nullptr, m_current_render_target->GetType()); GSVector4::store(rt.BeginningAccess.Clear.ClearValue.Color, clear_color); } } @@ -3446,7 +3541,7 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b if (depth_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) { LookupNativeFormat(m_current_depth_target->GetFormat(), nullptr, nullptr, nullptr, - &ds.DepthBeginningAccess.Clear.ClearValue.Format); + &ds.DepthBeginningAccess.Clear.ClearValue.Format, m_current_render_target->GetType()); ds.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = clear_depth; } ds.StencilEndingAccess.Type = stencil_end; @@ -3454,7 +3549,7 @@ void GSDevice12::BeginRenderPass(D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_b if (stencil_begin == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR) { LookupNativeFormat(m_current_depth_target->GetFormat(), nullptr, nullptr, nullptr, - &ds.StencilBeginningAccess.Clear.ClearValue.Format); + &ds.StencilBeginningAccess.Clear.ClearValue.Format, m_current_render_target->GetType()); ds.StencilBeginningAccess.Clear.ClearValue.DepthStencil.Stencil = clear_stencil; } } @@ -3903,7 +3998,15 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config) if (config.require_one_barrier || (config.tex && config.tex == config.rt)) // Used as "bind rt" flag when texture barrier is unsupported. { // requires a copy of the RT - draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true)); + pxAssert(draw_rt->GetFormat() == (colclip_rt ? GSTexture::Format::ColorClip : m_emulation_hw_rt_texture_format)); //TODO: delete this here and in OGL + VK +#if OLD_HDR //TODO: do in VK too + if (EmuConfig.HDRRendering && draw_rt->GetFormat() == GSTexture::Format::Color && (draw_rt->GetType() == GSTexture::Type::RenderTarget || draw_rt->GetType() == GSTexture::Type::RWTexture)) + draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, GSTexture::Format::ColorHDR, true)); + else + draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true)); +#else + draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true)); +#endif if (draw_rt_clone) { EndRenderPass(); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index 042b79d171..ef6efe49b5 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -179,6 +179,8 @@ private: D3D_FEATURE_LEVEL m_feature_level = D3D_FEATURE_LEVEL_11_0; + DXGI_FORMAT m_swap_chain_format; + public: struct alignas(8) PipelineSelector { @@ -260,6 +262,7 @@ public: NUM_TFX_SAMPLERS = 1, NUM_UTILITY_TEXTURES = 1, NUM_UTILITY_SAMPLERS = 1, + // This needs to match the sum of all the utility cbuffer sizes //TODO: max not sum. VK too CONVERT_PUSH_CONSTANTS_SIZE = 96, VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, @@ -319,7 +322,7 @@ private: std::array, 2> m_colclip_finish_pipelines{}; // [depth] std::array, 4>, 2> m_date_image_setup_pipelines{}; // [depth][datm] ComPtr m_fxaa_pipeline; - ComPtr m_shadeboost_pipeline; + ComPtr m_colorcorrect_pipeline; ComPtr m_imgui_pipeline; std::unordered_map> m_tfx_vertex_shaders; @@ -339,7 +342,8 @@ private: std::string m_tfx_source; void LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_format, DXGI_FORMAT* srv_format, - DXGI_FORMAT* rtv_format, DXGI_FORMAT* dsv_format) const; + DXGI_FORMAT* rtv_format, DXGI_FORMAT* dsv_format, GSTexture::Type type = GSTexture::Type::Invalid) const; + DXGI_FORMAT GetNativeFormat(GSTexture::Format format, GSTexture::Type type = GSTexture::Type::Invalid) const; u32 GetSwapChainBufferCount() const; bool CreateSwapChain(); @@ -354,7 +358,7 @@ private: const GSRegEXTBUF& EXTBUF, u32 c, const bool linear) final; void DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb) final; - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; + void DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; bool DoCAS( @@ -372,6 +376,7 @@ private: ComPtr GetUtilityVertexShader(const std::string& source, const char* entry_point); ComPtr GetUtilityPixelShader(const std::string& source, const char* entry_point); + ComPtr GetUtilityPixelShader(const std::string& source, const char* entry_point, ShaderMacro& shader_macro); bool CheckFeatures(const u32& vendor_id); bool CreateNullTexture(); @@ -499,9 +504,6 @@ public: void RenderTextureMipmap(GSTexture12* texture, u32 dst_level, u32 dst_width, u32 dst_height, u32 src_level, u32 src_width, u32 src_height); - // Ends a render pass if we're currently in one. - // When Bind() is next called, the pass will be restarted. - // Calling this function is allowed even if a pass has not begun. bool InRenderPass(); void BeginRenderPass( D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE color_begin = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, @@ -511,6 +513,9 @@ public: D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE stencil_begin = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_NO_ACCESS, D3D12_RENDER_PASS_ENDING_ACCESS_TYPE stencil_end = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_NO_ACCESS, GSVector4 clear_color = GSVector4::zero(), float clear_depth = 0.0f, u8 clear_stencil = 0); + // Ends a render pass if we're currently in one. + // When Bind() is next called, the pass will be restarted. + // Calling this function is allowed even if a pass has not begun. void EndRenderPass(); void SetViewport(const D3D12_VIEWPORT& viewport); diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp index d37281ad48..6576717f44 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp @@ -118,6 +118,23 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w D3D12_CLEAR_VALUE optimized_clear_value = {}; D3D12_RESOURCE_STATES state; +#if OLD_HDR // Add RT to allow textures of different formats to be copied in it + auto og_type = type; + if (format == GSTexture::Format::Color) + { + switch (type) + { + case GSTexture::Type::Texture: + case GSTexture::Type::RWTexture: + type = Type::RenderTarget; + } + if (rtv_format == DXGI_FORMAT_UNKNOWN) + { + rtv_format = dxgi_format; + } + } +#endif + switch (type) { case Type::Texture: @@ -223,6 +240,10 @@ std::unique_ptr GSTexture12::Create(Type type, Format format, int w return {}; } +#if OLD_HDR + type = og_type; +#endif + return std::unique_ptr( new GSTexture12(type, format, width, height, levels, dxgi_format, std::move(resource), std::move(allocation), srv_descriptor, write_descriptor, uav_descriptor, write_descriptor_type, state)); diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 8a703e5193..7392840353 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1104,7 +1104,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, rt_again->m_valid.y /= 2; rt_again->m_valid.w /= 2; rt_again->m_TEX0.PSM = PSMCT32; - GSTexture* tex = g_gs_device->CreateRenderTarget(rt_again->m_unscaled_size.x * rt_again->m_scale, rt_again->m_unscaled_size.y * rt_again->m_scale, GSTexture::Format::Color, false); + GSTexture* tex = g_gs_device->CreateRenderTarget(rt_again->m_unscaled_size.x * rt_again->m_scale, rt_again->m_unscaled_size.y * rt_again->m_scale, g_gs_device->GetEmuHWRTTexFormat(), false); if (!tex) return false; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 7401a2bb29..c1a2e95de8 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5751,6 +5751,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo blend.op = GSDevice::OP_ADD; // Render pass 2: Add or subtract result of render pass 1(Cd) from Cs. m_conf.blend_multi_pass.enable = true; + pxAssert(dither == 0 || dither == 1); // It will overflow below if it's any other value m_conf.blend_multi_pass.dither = dither * GSConfig.Dithering; m_conf.blend_multi_pass.blend = {true, blend_multi_pass.src, GSDevice::CONST_ONE, blend_multi_pass.op, GSDevice::CONST_ONE, GSDevice::CONST_ZERO, false, 0}; } @@ -6513,6 +6514,26 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c g_gs_device->FilteredDownsampleTexture(src_target->m_texture, src_copy.get(), downsample_factor, clamp_min, dRect); } } +#if 1 //TODO! +#if !OLD_HDR + else if (src_target->m_texture->GetFormat() != src_copy->GetFormat()) +#else + else if ((EmuConfig.HDRRendering && src_target->m_texture->GetFormat() == GSTexture::Format::Color && src_copy->GetFormat() == GSTexture::Format::Color + && ((src_target->m_texture->GetType() == GSTexture::Type::RenderTarget || src_target->m_texture->GetType() == GSTexture::Type::RWTexture) + != (src_copy->GetType() == GSTexture::Type::RenderTarget || src_copy->GetType() == GSTexture::Type::RWTexture))) + || src_target->m_texture->GetFormat() != src_copy->GetFormat()) +#endif + { + pxAssertMsg(src_target->m_texture->GetFormat() == GSTexture::Format::Color || src_copy->GetFormat() == GSTexture::Format::Color, "CopyFromTexture unsupported format."); +#if !OLD_HDR + pxAssertMsg(src_copy->GetType() == GSTexture::Type::RenderTarget, "CopyFromTexture unsupported type."); +#endif + + const GSVector4 src_rect = GSVector4(scaled_copy_range) / GSVector4(src_target->m_texture->GetSize()).xyxy(); + const GSVector4 dst_rect = GSVector4((float)scaled_copy_dst_offset.x, (float)scaled_copy_dst_offset.y, scaled_copy_dst_offset.x + (scaled_copy_range.width() * ((float)src_copy->GetSize().x / (float)src_target->m_texture->GetSize().x)), scaled_copy_dst_offset.y + (scaled_copy_range.height() * ((float)src_copy->GetSize().y / (float)src_target->m_texture->GetSize().y))); + g_gs_device->StretchRect(src_target->m_texture, src_rect, src_copy.get(), dst_rect, ShaderConvert::COPY, true); //TODO: linear or nearest? + } +#endif else { g_gs_device->CopyRect( @@ -8438,7 +8459,8 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc r_texture.y -= offset; r_texture.w -= offset; - if (GSTexture* rt = g_gs_device->CreateRenderTarget(tw, th, GSTexture::Format::Color)) + pxAssert(tex->m_texture->GetFormat() == g_gs_device->GetEmuHWRTTexFormat()); // We don't know what happens if this was the case, probably nothing! //TODO: delete!? All RTs should use the same format due to VK/DX12 limitations. + if (GSTexture* rt = g_gs_device->CreateRenderTarget(tw, th, tex->m_texture->GetFormat())) { // sRect is the top of texture // Need to half pixel offset the dest tex coordinates as draw pixels are top left instead of centre for texel reads. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 88815d4200..239e417933 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -2356,7 +2356,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (dst->m_scale != scale && (!preserve_scale || is_shuffle || !dst->m_downscaled || TEX0.TBW != dst->m_TEX0.TBW)) { calcRescale(dst); - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) : + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, g_gs_device->GetEmuHWRTTexFormat(), clear) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear); if (!tex) return nullptr; @@ -2456,7 +2456,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect), true); else { - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) : + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, g_gs_device->GetEmuHWRTTexFormat(), clear) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear); if (!tex) return nullptr; @@ -2477,7 +2477,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->ResizeTexture(new_size.x, new_size.y, true, true, GSVector4i(dRect)); else { - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, clear) : + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, g_gs_device->GetEmuHWRTTexFormat(), clear) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, clear); if (!tex) return nullptr; @@ -2561,7 +2561,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // So, create a new target, clear/preload it, and copy RGB in. GSTexture* tex = (type == RenderTarget) ? g_gs_device->CreateRenderTarget(dst->m_texture->GetWidth(), - dst->m_texture->GetHeight(), GSTexture::Format::Color, true) : + dst->m_texture->GetHeight(), g_gs_device->GetEmuHWRTTexFormat(), true) : g_gs_device->CreateDepthStencil(dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), GSTexture::Format::DepthStencil, true); if (!tex) @@ -3246,9 +3246,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons t->m_valid.w -= height_adjust; t->ResizeValidity(t->m_valid); + //TODO: delete? + pxAssert(!t->m_texture->IsRenderTarget() || t->m_texture->GetFormat() == g_gs_device->GetEmuHWRTTexFormat()); + GSTexture* tex = (type == RenderTarget) ? g_gs_device->CreateRenderTarget(t->m_texture->GetWidth(), - t->m_texture->GetHeight(), GSTexture::Format::Color, true) : + t->m_texture->GetHeight(), g_gs_device->GetEmuHWRTTexFormat(), true) : g_gs_device->CreateDepthStencil(t->m_texture->GetWidth(), t->m_texture->GetHeight(), GSTexture::Format::DepthStencil, true); if (tex) @@ -3390,7 +3393,7 @@ void GSTextureCache::Target::ScaleRTAlpha() const GSVector4i valid_rect = GSVector4i(GSVector4(m_valid) * GSVector4(m_scale)); GL_PUSH("ScaleRTAlpha(valid=(%dx%d %d,%d=>%d,%d))", m_valid.width(), m_valid.height(), m_valid.x, m_valid.y, m_valid.z, m_valid.w); - if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect))) + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, g_gs_device->GetEmuHWRTTexFormat(), !GSVector4i::loadh(rtsize).eq(valid_rect))) { // Only copy up the valid area, since there's no point in "correcting" nothing. const GSVector4 dRect(m_texture->GetRect().rintersect(valid_rect)); @@ -3416,7 +3419,7 @@ void GSTextureCache::Target::UnscaleRTAlpha() const GSVector4i valid_rect = GSVector4i(GSVector4(m_valid) * GSVector4(m_scale)); GL_PUSH("UnscaleRTAlpha(valid=(%dx%d %d,%d=>%d,%d))", valid_rect.width(), valid_rect.height(), valid_rect.x, valid_rect.y, valid_rect.z, valid_rect.w); - if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, !GSVector4i::loadh(rtsize).eq(valid_rect))) + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, g_gs_device->GetEmuHWRTTexFormat(), !GSVector4i::loadh(rtsize).eq(valid_rect))) { // Only copy up the valid area, since there's no point in "correcting" nothing. const GSVector4 dRect(m_texture->GetRect().rintersect(valid_rect)); @@ -3470,12 +3473,13 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, const int new_width = std::max(t->m_unscaled_size.x, needed_width); const int scaled_new_height = static_cast(std::ceil(static_cast(new_height) * scale)); const int scaled_new_width = static_cast(std::ceil(static_cast(new_width) * scale)); - GSTexture* new_texture = g_gs_device->CreateRenderTarget(scaled_new_width, scaled_new_height, GSTexture::Format::Color, false); + GSTexture* new_texture = g_gs_device->CreateRenderTarget(scaled_new_width, scaled_new_height, g_gs_device->GetEmuHWRTTexFormat(), false); if (!new_texture) { // Memory allocation failure, do our best to hobble along. return; } + pxAssert(new_texture->GetFormat() == old_texture->GetFormat() && new_texture->GetType() == old_texture->GetType()); //TODO: delete? Was this expected? Would it work anyway? Probably! It does stretch rect below! GL_CACHE("Expanding target for display output, target height %d @ 0x%X, display %d @ 0x%X offset %d needed %d", t->m_unscaled_size.y, t->m_TEX0.TBP0, real_h, dispfb.TBP0, y_offset, needed_height); @@ -3569,7 +3573,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src) GSTexture* tex = dst->m_texture; if (needs_new_tex) { - tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, + tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, g_gs_device->GetEmuHWRTTexFormat(), new_size != dst->m_unscaled_size || new_size != depth_src->m_unscaled_size); if (!tex) return false; @@ -3637,7 +3641,7 @@ bool GSTextureCache::CopyRGBFromDepthToColor(Target* dst, Target* depth_src) bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr* tex) { GSDownloadTexture* ctex = tex->get(); - if (ctex && ctex->GetWidth() >= width && ctex->GetHeight() >= height) + if (ctex && ctex->GetWidth() >= width && ctex->GetHeight() >= height && ctex->GetFormat() == format) return true; // In the case of oddly sized texture reads, we'll keep the larger dimension. @@ -4545,6 +4549,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // DirectX also can't copy to the same texture it's reading from (except potentially with enhanced barriers). if (SBP == DBP && (!(GSVector4i(sx, sy, sx + w, sy + h).rintersect(GSVector4i(dx, dy, dx + w, dy + h))).rempty() || renderer_is_directx)) { + pxAssert(src->m_texture->IsDepthStencil() || src->m_texture->IsRenderTarget()); //TODO: delete... making sure the source texture was already an RT, otherwise we might need to upgrade the format to HDR! GSTexture* tmp_texture = src->m_texture->IsDepthStencil() ? g_gs_device->CreateDepthStencil(src->m_texture->GetWidth(), src->m_texture->GetHeight(), src->m_texture->GetFormat(), false) : g_gs_device->CreateRenderTarget(src->m_texture->GetWidth(), src->m_texture->GetHeight(), src->m_texture->GetFormat(), false); @@ -4833,6 +4838,8 @@ void GSTextureCache::CopyPages(Target* src, u32 sbw, u32 src_offset, Target* dst rc.wmask.wrgba = 0xf; } + pxAssert(src->m_texture->GetFormat() != GSTexture::Format::ColorHDR); + // No need to sort here, it's all from the same texture. g_gs_device->DrawMultiStretchRects(rects, num_pages, dst->m_texture, shader); } @@ -5192,14 +5199,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con dst->Update(); // If we have a source larger than the target (from tex-in-rt), texelFetch() for target region will return black. + // This is for when a target is used as a source but the origin is offset. if constexpr (force_target_copy) { // If we have a source larger than the target, we need to clear it, otherwise we'll read junk const bool outside_target = ((x + w) > dst->m_texture->GetWidth() || (y + h) > dst->m_texture->GetHeight()); GSTexture* sTex = dst->m_texture; + pxAssert(sTex->GetFormat() == GSTexture::Format::Color || sTex->GetFormat() == GSTexture::Format::ColorHDR); // Other formats might work but are untested GSTexture* dTex = outside_target ? - g_gs_device->CreateRenderTarget(w, h, GSTexture::Format::Color, true, PreferReusedLabelledTexture()) : - g_gs_device->CreateTexture(w, h, tlevels, GSTexture::Format::Color, PreferReusedLabelledTexture()); + g_gs_device->CreateRenderTarget(w, h, g_gs_device->GetEmuHWRTTexFormat(), true, PreferReusedLabelledTexture()) : + g_gs_device->CreateTexture(w, h, tlevels, dst->m_texture->GetFormat(), PreferReusedLabelledTexture()); if (!dTex) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for offset source", w, h); @@ -5213,11 +5222,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); if (!area.rempty()) { - if (dst->m_rt_alpha_scale) +#if OLD_HDR + if (dst->m_rt_alpha_scale || dTex->GetFormat() != sTex->GetFormat() || dTex->GetType() != sTex->GetType()) +#else + if (dst->m_rt_alpha_scale || dTex->GetFormat() != sTex->GetFormat()) +#endif { const GSVector4 sRectF = GSVector4(area) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight()); g_gs_device->StretchRect( - sTex, sRectF, dTex, GSVector4(area), ShaderConvert::RTA_DECORRECTION, false); + sTex, sRectF, dTex, GSVector4(area), dst->m_rt_alpha_scale ? ShaderConvert::RTA_DECORRECTION : ShaderConvert::COPY, false); } else g_gs_device->CopyRect(sTex, dTex, area, 0, 0); @@ -5530,10 +5543,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Don't be fooled by the name. 'dst' is the old target (hence the input) // 'src' is the new texture cache entry (hence the output) GSTexture* sTex = dst->m_texture; + pxAssert(sTex->GetFormat() == GSTexture::Format::Color || sTex->GetFormat() == GSTexture::Format::ColorHDR); // Other formats might work but are untested GSTexture* dTex = use_texture ? - g_gs_device->CreateTexture(new_size.x, new_size.y, 1, GSTexture::Format::Color, + g_gs_device->CreateTexture(new_size.x, new_size.y, 1, sTex->GetFormat(), PreferReusedLabelledTexture()) : - g_gs_device->CreateRenderTarget(new_size.x, new_size.y, GSTexture::Format::Color, + g_gs_device->CreateRenderTarget(new_size.x, new_size.y, g_gs_device->GetEmuHWRTTexFormat(), source_rect_empty || destX != 0 || destY != 0, PreferReusedLabelledTexture()); if (!dTex) [[unlikely]] { @@ -5549,11 +5563,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (use_texture) { - if (dst->m_rt_alpha_scale) + if (dst->m_rt_alpha_scale || dTex->GetFormat() != sTex->GetFormat()) { const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight()); g_gs_device->StretchRect( - sTex, sRectF, dTex, GSVector4(destX, destY, sRect.width(), sRect.height()), ShaderConvert::RTA_DECORRECTION, false); + sTex, sRectF, dTex, GSVector4(destX, destY, sRect.width(), sRect.height()), dst->m_rt_alpha_scale ? ShaderConvert::RTA_DECORRECTION : ShaderConvert::COPY, false); } else g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY); @@ -6015,7 +6029,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR lmtex->Unmap(); // Allocate our render target for drawing everything to. - GSTexture* dtex = g_gs_device->CreateRenderTarget(scaled_width, scaled_height, GSTexture::Format::Color, true); + GSTexture* dtex = g_gs_device->CreateRenderTarget(scaled_width, scaled_height, g_gs_device->GetEmuHWRTTexFormat(), true); if (!dtex) [[unlikely]] { Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); @@ -6160,7 +6174,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 const int tw = region.HasX() ? region.GetWidth() : (1 << TEX0.TW); const int th = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); const int tlevels = lod ? (GSConfig.HWMipmap ? std::min(lod->y - lod->x + 1, GSDevice::GetMipmapLevelsForSize(tw, th)) : -1) : 1; - GSTexture* tex = g_gs_device->CreateTexture(tw, th, tlevels, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color); + GSTexture* tex = g_gs_device->CreateTexture(tw, th, tlevels, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color); //TODO: HDR? Nah if (!tex) { // out of video memory if we hit here @@ -6275,7 +6289,7 @@ GSTextureCache::Target* GSTextureCache::Target::Create(GIFRegTEX0 TEX0, int w, i const int scaled_w = static_cast(std::ceil(static_cast(w) * scale)); const int scaled_h = static_cast(std::ceil(static_cast(h) * scale)); GSTexture* texture = (type == RenderTarget) ? - g_gs_device->CreateRenderTarget(scaled_w, scaled_h, GSTexture::Format::Color, clear, PreferReusedLabelledTexture()) : + g_gs_device->CreateRenderTarget(scaled_w, scaled_h, g_gs_device->GetEmuHWRTTexFormat(), clear, PreferReusedLabelledTexture()) : g_gs_device->CreateDepthStencil(scaled_w, scaled_h, GSTexture::Format::DepthStencil, clear, PreferReusedLabelledTexture()); if (!texture) return nullptr; @@ -6392,6 +6406,7 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r) } else { + // If the source was "HDR" (see "m_emulation_hw_texture_format"), we clip it for reading anyway fmt = GSTexture::Format::Color; if (t->m_rt_alpha_scale) ps_shader = ShaderConvert::RTA_DECORRECTION; @@ -6447,7 +6462,7 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r) const GSVector4 src(GSVector4(r) * GSVector4(t->m_scale) / GSVector4(t->m_texture->GetSize()).xyxy()); const GSVector4i drc(0, 0, r.width(), r.height()); - const bool direct_read = t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY; + const bool direct_read = t->m_type == RenderTarget && t->m_scale == 1.0f && ps_shader == ShaderConvert::COPY && t->m_texture->GetFormat() != GSTexture::Format::ColorHDR; if (!PrepareDownloadTexture(drc.z, drc.w, fmt, dltex)) return; @@ -6515,6 +6530,7 @@ void GSTextureCache::Read(Source* t, const GSVector4i& r) if (!PrepareDownloadTexture(drc.z, drc.w, GSTexture::Format::Color, &m_color_download_texture)) return; + pxAssertMsg(t->m_texture->GetFormat() == m_color_download_texture->GetFormat(), "GSTextureCache::Read between different formats."); m_color_download_texture->CopyFromTexture(drc, t->m_texture, r, 0, true); m_color_download_texture->Flush(); @@ -6805,6 +6821,8 @@ void GSTextureCache::Source::Flush(u32 count, int layer, const GSOffset& off) pitch = VectorAlign(pitch); + pxAssert(m_texture->GetFormat() != GSTexture::Format::ColorHQ && m_texture->GetFormat() != GSTexture::Format::ColorHDR); // This probably wouldn't work + for (u32 i = 0; i < count; i++) { const GSVector4i r(m_write.rect[i]); @@ -6970,6 +6988,7 @@ void GSTextureCache::Target::Update(bool cannot_scale) const GSVector4 t_sizef(t_size.zwzw()); // This'll leave undefined data in pixels that we're not reading from... shouldn't hurt anything. + // This texture's data comes from SW emulation so it doesn't need to follow "m_emulation_hw_texture_format". GSTexture* const t = g_gs_device->CreateTexture(t_size.z, t_size.w, 1, GSTexture::Format::Color); if (!t) [[unlikely]] { @@ -7752,7 +7771,7 @@ void GSTextureCache::Palette::InitializeTexture() // sampling such texture are always normalized by 255. // This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255), // and therefore will read texel 15/255 * texture size). - m_tex_palette = g_gs_device->CreateTexture(m_pal, 1, 1, GSTexture::Format::Color); + m_tex_palette = g_gs_device->CreateTexture(m_pal, 1, 1, GSTexture::Format::Color); //TODO: HDR? Nah if (!m_tex_palette) [[unlikely]] { Console.Error("Failed to allocate %ux1 texture for palette", m_pal); @@ -8142,6 +8161,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE // If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer. const GSVector4i unoffset_rect(0, 0, tw, th); GSTexture::GSMap map; + pxAssert(tex->GetFormat() != GSTexture::Format::ColorHQ && tex->GetFormat() != GSTexture::Format::ColorHDR); if (rect.eq(block_rect) && !alpha_minmax && tex->Map(map, &unoffset_rect, level)) { rtx(mem, off, block_rect, map.bits, map.pitch, TEXA); diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacementLoaders.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacementLoaders.cpp index 212e02326f..bf47fb3ef9 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacementLoaders.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacementLoaders.cpp @@ -489,6 +489,13 @@ static bool ParseDDSHeader(std::FILE* fp, DDSLoadInfo* info) if (!features.dxt_textures) return false; } + //TODO! + else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0') || dxt10_format == 10 /*DXGI_FORMAT_R16G16B16A16_FLOAT*/) + { + info->format = GSTexture::Format::ColorHDR; + info->block_size = 1; // Not compressed + info->bytes_per_block = 8; + } else if (dxt10_format == 98 /*DXGI_FORMAT_BC7_UNORM*/) { info->format = GSTexture::Format::BC7; diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp index a8d71bb151..54e7f8981d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp @@ -581,7 +581,7 @@ void GSTextureReplacements::SetReplacementTextureAlphaMinMax(ReplacementTexture& break; default: - pxAssert(rtex.format == GSTexture::Format::Color); + pxAssert(rtex.format == GSTexture::Format::Color); //TODO: FP16 support rtex.alpha_minmax = GSGetRGBA8AlphaMinMax(rtex.data.data(), rtex.width, rtex.height, rtex.pitch); break; } diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 43badee1f0..7c7ac3ae98 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1066,12 +1066,13 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) auto ps_copy_rta_correct = LoadShader(@"ps_rta_correction"); auto pdesc = [[MTLRenderPipelineDescriptor new] autorelease]; // FS Triangle Pipelines - pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_emulation_hw_rt_texture_format); m_colclip_resolve_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_colclip_resolve"), @"ColorClip Resolve"); - m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa"); - m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost"); m_clut_pipeline[0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_4"), @"4-bit CLUT Update"); m_clut_pipeline[1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_8"), @"8-bit CLUT Update"); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_postprocess_texture_format); + m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa"); + m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost"); pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::ColorClip); m_colclip_init_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_colclip_init"), @"ColorClip Init"); m_colclip_clear_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_clear"), @"ColorClip Clear"); @@ -1095,23 +1096,23 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) m_primid_init_pipeline[0][2] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_rta_init_datm0"), @"PrimID DATM0 RTA Clear"); m_primid_init_pipeline[0][3] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_rta_init_datm1"), @"PrimID DATM1 RTA Clear"); - pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); applyAttribute(pdesc.vertexDescriptor, 0, MTLVertexFormatFloat2, offsetof(ConvertShaderVertex, pos), 0); applyAttribute(pdesc.vertexDescriptor, 1, MTLVertexFormatFloat2, offsetof(ConvertShaderVertex, texpos), 0); pdesc.vertexDescriptor.layouts[0].stride = sizeof(ConvertShaderVertex); - + + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_postprocess_texture_format); for (size_t i = 0; i < std::size(m_interlace_pipeline); i++) { NSString* name = [NSString stringWithFormat:@"ps_interlace%zu", i]; m_interlace_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), name); } + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_emulation_hw_rt_texture_format); for (size_t i = 0; i < std::size(m_convert_pipeline); i++) { ShaderConvert conv = static_cast(i); NSString* name = [NSString stringWithCString:shaderName(conv) encoding:NSUTF8StringEncoding]; switch (conv) { - case ShaderConvert::Count: case ShaderConvert::DATM_0: case ShaderConvert::DATM_1: case ShaderConvert::DATM_0_RTA_CORRECTION: @@ -1143,6 +1144,15 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid; pdesc.depthAttachmentPixelFormat = ConvertPixelFormat(GSTexture::Format::DepthStencil); break; + case ShaderConvert::COPY_EMU_LQ: + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + break; + case ShaderConvert::COPY_POSTPROCESS: + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_postprocess_texture_format); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + break; + default: case ShaderConvert::COPY: case ShaderConvert::DOWNSAMPLE_COPY: case ShaderConvert::RGBA_TO_8I: // Yes really @@ -1153,7 +1163,7 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) case ShaderConvert::FLOAT32_TO_RGB8: case ShaderConvert::FLOAT16_TO_RGB5A1: case ShaderConvert::YUV: - pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_emulation_hw_rt_texture_format); pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; break; } @@ -1166,17 +1176,18 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) pdesc.colorAttachments[0].writeMask = mask; m_convert_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), name); } + + pdesc.colorAttachments[0].pixelFormat = layer_px_fmt; pdesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; for (size_t i = 0; i < std::size(m_present_pipeline); i++) { PresentShader conv = static_cast(i); NSString* name = [NSString stringWithCString:shaderName(conv) encoding:NSUTF8StringEncoding]; - pdesc.colorAttachments[0].pixelFormat = layer_px_fmt; m_present_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), [NSString stringWithFormat:@"present_%s", shaderName(conv) + 3]); } - pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_emulation_hw_rt_texture_format); for (size_t i = 0; i < std::size(m_convert_pipeline_copy_mask); i++) { MTLColorWriteMask mask = MTLColorWriteMaskNone; @@ -1190,6 +1201,7 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, i & 16 ? ps_copy_rta_correct : ps_copy, name); } + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(m_postprocess_texture_format); pdesc.colorAttachments[0].blendingEnabled = YES; pdesc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; pdesc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; @@ -1581,6 +1593,22 @@ void GSDeviceMTL::RenderCopy(GSTexture* sTex, id pipelin void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear) { @autoreleasepool { + if (shader == ShaderConvert::COPY && dTex && dTex->GetFormat() != m_emulation_hw_rt_texture_format) + { + if (dTex->GetFormat() == GSTexture::Format::Color) + { + shader = ShaderConvert::COPY_EMU_LQ; + } + else if (dTex->GetFormat() == m_postprocess_texture_format) + { + shader = ShaderConvert::COPY_POSTPROCESS; + } + else + { + pxAssertMsg(false, "Trying to use the ShaderConvert::COPY shader pipeline to target an unsupported RT format"); + } + } + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); id pipeline = m_convert_pipeline[static_cast(shader)]; @@ -1613,7 +1641,7 @@ static_assert(offsetof(DisplayConstantBuffer, TargetResolution) == offsetof(G static_assert(offsetof(DisplayConstantBuffer, RcpTargetResolution) == offsetof(GSMTLPresentPSUniform, rcp_target_resolution)); static_assert(offsetof(DisplayConstantBuffer, SourceResolution) == offsetof(GSMTLPresentPSUniform, source_resolution)); static_assert(offsetof(DisplayConstantBuffer, RcpSourceResolution) == offsetof(GSMTLPresentPSUniform, rcp_source_resolution)); -static_assert(offsetof(DisplayConstantBuffer, TimeAndPad.x) == offsetof(GSMTLPresentPSUniform, time)); +static_assert(offsetof(DisplayConstantBuffer, TimeAndBrightnessAndPad) == offsetof(GSMTLPresentPSUniform, time_brightness)); void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) { @autoreleasepool { @@ -1622,6 +1650,7 @@ void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture cb.SetSource(sRect, sTex->GetSize()); cb.SetTarget(dRect, ds); cb.SetTime(shaderTime); + cb.SetBrightness(EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f); id pipe = m_present_pipeline[static_cast(shader)]; if (dTex) @@ -1876,6 +1905,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD); setFnConstantB(m_fn_constants, pssel.region_rect, GSMTLConstantIndex_PS_REGION_RECT); setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK); + setFnConstantB(m_fn_constants, EmuConfig.HDRRendering, GSMTLConstantIndex_PS_HDR); auto newps = LoadShader(@"ps_main"); ps = newps; m_hw_ps.insert(std::make_pair(pssel, std::move(newps))); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 1773fdc65a..7b2a232f4e 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -37,7 +37,7 @@ struct GSMTLPresentPSUniform vector_float2 rcp_target_resolution; ///< 1 / target_resolution vector_float2 source_resolution; vector_float2 rcp_source_resolution; ///< 1 / source_resolution - float time; + vector_float2 time_and_brightness; // time, user brightness scale (HDR) }; struct GSMTLInterlacePSUniform @@ -214,4 +214,5 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_MANUAL_LOD, GSMTLConstantIndex_PS_REGION_RECT, GSMTLConstantIndex_PS_SCANMSK, + GSMTLConstantIndex_PS_HDR, }; diff --git a/pcsx2/GS/Renderers/Metal/present.metal b/pcsx2/GS/Renderers/Metal/present.metal index 30e86d271c..02919d179e 100644 --- a/pcsx2/GS/Renderers/Metal/present.metal +++ b/pcsx2/GS/Renderers/Metal/present.metal @@ -81,7 +81,7 @@ struct LottesCRTPass float ToLinear1(float c) { - return c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4); + return pow(abs(c), 2.2) * sign(c); } float3 ToLinear(float3 c) @@ -89,14 +89,14 @@ struct LottesCRTPass return float3(ToLinear1(c.r), ToLinear1(c.g), ToLinear1(c.b)); } - float ToSrgb1(float c) + float ToGamma1(float c) { - return c < 0.0031308 ? c * 12.92 : 1.055 * pow(c, 0.41666) - 0.055; + return pow(abs(c), 1.0 / 2.2) * sign(c); } - float3 ToSrgb(float3 c) + float3 ToGamma(float3 c) { - return float3(ToSrgb1(c.r), ToSrgb1(c.g), ToSrgb1(c.b)); + return float3(ToGamma1(c.r), ToGamma1(c.g), ToGamma1(c.b)); } float3 Fetch(float2 pos, float2 off) @@ -349,7 +349,7 @@ struct LottesCRTPass #if UseShadowMask color.rgb *= Mask(fragcoord.xy); #endif - color.rgb = ToSrgb(color.rgb); + color.rgb = ToGamma(color.rgb); return color; } diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 3e364e3558..aa854e8478 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -69,6 +69,7 @@ constant bool PS_AUTOMATIC_LOD [[function_constant(GSMTLConstantIndex_PS_AU constant bool PS_MANUAL_LOD [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]]; constant bool PS_REGION_RECT [[function_constant(GSMTLConstantIndex_PS_REGION_RECT)]]; constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]]; +constant uint PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]]; constant GSMTLExpandType VS_EXPAND_TYPE = static_cast(VS_EXPAND_TYPE_RAW); diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 1775f9caa6..01eecd7a95 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -419,7 +419,7 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) m_present[i].RegisterUniform("u_rcp_target_resolution"); m_present[i].RegisterUniform("u_source_resolution"); m_present[i].RegisterUniform("u_rcp_source_resolution"); - m_present[i].RegisterUniform("u_time"); + m_present[i].RegisterUniform("u_time_and_brightness"); } } @@ -472,7 +472,7 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle) // **************************************************************** // Post processing // **************************************************************** - if (!CompileShadeBoostProgram() || !CompileFXAAProgram()) + if (!CompileColorCorrectProgram() || !CompileFXAAProgram()) return false; // Image load store and GLSL 420pack is core in GL4.2, no need to check. @@ -825,7 +825,7 @@ void GSDeviceOGL::DestroyResources() m_cas.upscale_ps.Destroy(); m_cas.sharpen_ps.Destroy(); - m_shadeboost.ps.Destroy(); + m_colorcorrect.ps.Destroy(); for (GLProgram& prog : m_date.primid_ps) prog.Destroy(); @@ -1539,6 +1539,7 @@ void GSDeviceOGL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture cb.SetSource(sRect, sTex->GetSize()); cb.SetTarget(dRect, ds); cb.SetTime(shaderTime); + cb.SetBrightness(EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f); GLProgram& prog = m_present[static_cast(shader)]; prog.Bind(); @@ -1550,7 +1551,7 @@ void GSDeviceOGL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture prog.Uniform2fv(5, &cb.RcpTargetResolution.x); prog.Uniform2fv(6, &cb.SourceResolution.x); prog.Uniform2fv(7, &cb.RcpSourceResolution.x); - prog.Uniform1f(8, cb.TimeAndPad.x); + prog.Uniform2fv(8, &cb.TimeAndBrightnessAndPad.x); OMSetDepthStencilState(m_convert.dss); OMSetBlendState(false); @@ -1867,29 +1868,31 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, true); } -bool GSDeviceOGL::CompileShadeBoostProgram() +bool GSDeviceOGL::CompileColorCorrectProgram() { - const std::optional shader = ReadShaderSource("shaders/opengl/shadeboost.glsl"); + const std::optional shader = ReadShaderSource("shaders/opengl/colorcorrect.glsl"); if (!shader.has_value()) { - Host::ReportErrorAsync("GS", "Failed to read shaders/opengl/shadeboost.glsl."); + Host::ReportErrorAsync("GS", "Failed to read shaders/opengl/colorcorrect.glsl."); return false; } const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, *shader)); - if (!m_shader_cache.GetProgram(&m_shadeboost.ps, m_convert.vs, ps)) + if (!m_shader_cache.GetProgram(&m_colorcorrect.ps, m_convert.vs, ps)) return false; - m_shadeboost.ps.RegisterUniform("params"); - m_shadeboost.ps.SetName("Shadeboost pipe"); + m_colorcorrect.ps.RegisterUniform("correction"); + m_colorcorrect.ps.RegisterUniform("adjustment"); + m_colorcorrect.ps.SetName("ColorCorrect pipe"); return true; } -void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) +void GSDeviceOGL::DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) { - GL_PUSH("DoShadeBoost"); + GL_PUSH("DoColorCorrect"); - m_shadeboost.ps.Bind(); - m_shadeboost.ps.Uniform4fv(0, params); + m_colorcorrect.ps.Bind(); + m_colorcorrect.ps.Uniform4fv(0, &cb.correction.x); + m_colorcorrect.ps.Uniform4fv(1, &cb.adjustment.x); OMSetColorMaskState(); @@ -1898,7 +1901,7 @@ void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float par const GSVector4 sRect(0, 0, 1, 1); const GSVector4 dRect(0, 0, s.x, s.y); - StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, false); + StretchRect(sTex, sRect, dTex, dRect, m_colorcorrect.ps, false); } void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, SetDATM datm) @@ -2490,7 +2493,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config) if (config.require_one_barrier && !m_features.texture_barrier) { // Requires a copy of the RT - draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true); + draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, config.rt->GetFormat(), true); GL_PUSH("Copy RT to temp texture for fbmask {%d,%d %dx%d}", config.drawarea.left, config.drawarea.top, config.drawarea.width(), config.drawarea.height()); diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index ebb36bd9ba..5b7e4a0602 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -196,7 +196,7 @@ private: struct { GLProgram ps; - } m_shadeboost; + } m_colorcorrect; struct { @@ -249,8 +249,8 @@ private: bool CompileFXAAProgram(); void DoFXAA(GSTexture* sTex, GSTexture* dTex) override; - bool CompileShadeBoostProgram(); - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) override; + bool CompileColorCorrectProgram(); + void DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) override; bool CreateCASPrograms(); bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) override; diff --git a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp index 9f0e9b00ee..0b65332b04 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSTextureOGL.cpp @@ -64,6 +64,7 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format break; // 4 channel normalized + // There's no proper support for HDR in OpenGL, so we force 8bpc case Format::Color: case Format::ColorHQ: case Format::ColorHDR: diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index 4114b504d3..749dd3ced7 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -113,7 +113,8 @@ GSTexture* GSRendererSW::GetOutput(int i, float& scale, int& y_offset) const int w = curFramebuffer.FBW * 64; const int h = framebufferSize.y; - if (g_gs_device->ResizeRenderTarget(&m_texture[index], w, h, false, false)) + // This texture needs to be RGBA8 as CPU writes assume so (no need for it to follow "m_emulation_hw_rt_texture_format") + if (g_gs_device->ResizeRenderTarget(&m_texture[index], w, h, false, false, GSTexture::Format::Color)) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[curFramebuffer.PSM]; constexpr int pitch = 1024 * 4; diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 762f65ed5e..fab14ad507 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -2714,7 +2714,7 @@ void GSDeviceVK::DrawIndexedPrimitive(int offset, int count) vkCmdDrawIndexed(GetCurrentCommandBuffer(), count, 1, m_index.start + offset, m_vertex.start, 0); } -VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const +VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format, GSTexture::Type type) const { static constexpr std::array(GSTexture::Format::Last) + 1> s_format_mapping = {{ VK_FORMAT_UNDEFINED, // Invalid @@ -2733,9 +2733,18 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const VK_FORMAT_BC7_UNORM_BLOCK, // BC7 }}; - return (format != GSTexture::Format::DepthStencil || m_features.stencil_buffer) ? - s_format_mapping[static_cast(format)] : - VK_FORMAT_D32_SFLOAT; + if (format != GSTexture::Format::DepthStencil || m_features.stencil_buffer) + { +#if OLD_HDR + if (EmuConfig.HDRRendering && format == GSTexture::Format::Color && (type == GSTexture::Type::RenderTarget || type == GSTexture::Type::RWTexture)) + { + return VK_FORMAT_R16G16B16A16_SFLOAT; + } +#endif + return s_format_mapping[static_cast(format)]; + } + + return VK_FORMAT_D32_SFLOAT; } GSTexture* GSDeviceVK::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) @@ -2761,6 +2770,8 @@ void GSDeviceVK::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, { g_perfmon.Put(GSPerfMon::TextureCopies, 1); + pxAssert(sTex->GetFormat() == dTex->GetFormat()); + GSTextureVK* const sTexVK = static_cast(sTex); GSTextureVK* const dTexVK = static_cast(dTex); const GSVector4i dtex_rc(0, 0, dTexVK->GetWidth(), dTexVK->GetHeight()); @@ -2852,14 +2863,37 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* ShaderConvert shader /* = ShaderConvert::COPY */, bool linear /* = true */) { pxAssert(HasDepthOutput(shader) == (dTex && dTex->GetType() == GSTexture::Type::DepthStencil)); - pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); GL_INS("StretchRect(%d) {%d,%d} %dx%d -> {%d,%d) %dx%d", shader, int(sRect.left), int(sRect.top), int(sRect.right - sRect.left), int(sRect.bottom - sRect.top), int(dRect.left), int(dRect.top), int(dRect.right - dRect.left), int(dRect.bottom - dRect.top)); + if (shader == ShaderConvert::COPY && dTex && dTex->GetFormat() != m_emulation_hw_rt_texture_format) + { + pxAssert(false); //TODO: delete if this never happens??? + + if (dTex->GetFormat() == GSTexture::Format::Color) + { + shader = ShaderConvert::COPY_EMU_LQ; + } + else if (dTex->GetFormat() == m_postprocess_texture_format) + { + shader = ShaderConvert::COPY_POSTPROCESS; + } + else + { + pxAssertMsg(false, "Trying to use the ShaderConvert::COPY shader pipeline to target an unsupported RT format"); + } + } + else + { + pxAssertMsg(dTex, "StretchRect(): The destination texture needs to valid (it used to redirect to the presentation surface but it doesn't anymore)"); + } + + pxAssert(linear ? SupportsBilinear(shader) : SupportsNearest(shader)); + DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, - dTex ? m_convert[static_cast(shader)] : m_present[static_cast(shader)], linear, + m_convert[static_cast(shader)], linear, ShaderConvertWriteMask(shader) == 0xf); } @@ -2868,6 +2902,8 @@ void GSDeviceVK::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* { GL_PUSH("ColorCopy Red:%d Green:%d Blue:%d Alpha:%d", red, green, blue, alpha); + pxAssertMsg((shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION) && dTex && dTex->GetFormat() == m_emulation_hw_rt_texture_format, "Trying to use the m_color_copy shader pipeline to target an unsupported RT format"); + const u32 index = (red ? 1 : 0) | (green ? 2 : 0) | (blue ? 4 : 0) | (alpha ? 8 : 0); const bool allow_discard = (index == 0xf); int rta_offset = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; @@ -2882,6 +2918,7 @@ void GSDeviceVK::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* cb.SetSource(sRect, sTex->GetSize()); cb.SetTarget(dRect, dTex ? dTex->GetSize() : GSVector2i(GetWindowWidth(), GetWindowHeight())); cb.SetTime(shaderTime); + cb.SetBrightness(EmuConfig.HDROutput ? (GSConfig.HDR_BrightnessNits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f); SetUtilityPushConstants(&cb, sizeof(cb)); DoStretchRect(static_cast(sTex), sRect, static_cast(dTex), dRect, @@ -2995,6 +3032,7 @@ void GSDeviceVK::DoMultiStretchRects( SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler); pxAssert(shader == ShaderConvert::COPY || shader == ShaderConvert::RTA_CORRECTION || rects[0].wmask.wrgba == 0xf); + pxAssert(shader != ShaderConvert::COPY || dTex->GetFormat() == m_emulation_hw_rt_texture_format); int rta_bit = (shader == ShaderConvert::RTA_CORRECTION) ? 16 : 0; SetPipeline( (rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba | rta_bit] : m_convert[static_cast(shader)]); @@ -3021,7 +3059,7 @@ void GSDeviceVK::BeginRenderPassForStretchRect( m_utility_depth_render_pass_load, dtex_rc); } - else if (dTex->GetFormat() == GSTexture::Format::Color) + else if (dTex->GetFormat() == m_emulation_hw_rt_texture_format) { if (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) BeginClearRenderPass(m_utility_color_render_pass_clear, dtex_rc, dTex->GetClearColor()); @@ -3030,8 +3068,20 @@ void GSDeviceVK::BeginRenderPassForStretchRect( m_utility_color_render_pass_load, dtex_rc); } + else if (dTex->GetFormat() == m_postprocess_texture_format) + { + if (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + BeginClearRenderPass(m_postprocess_render_pass_clear, dtex_rc, dTex->GetClearColor()); + else + BeginRenderPass((load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) ? m_postprocess_render_pass_discard : + m_postprocess_render_pass_load, + dtex_rc); + } else { + // This might be innocuous, but either way it should have already been handled above with "m_utility_color_render_pass_*" or "m_postprocess_render_pass_*". + pxAssert(dTex->GetFormat() != GSTexture::Format::Color && dTex->GetFormat() != GSTexture::Format::ColorHQ && dTex->GetFormat() != GSTexture::Format::ColorHDR); + // integer formats, etc const VkRenderPass rp = GetRenderPass(dTex->GetVkFormat(), VK_FORMAT_UNDEFINED, load_op, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE); @@ -3187,6 +3237,7 @@ void GSDeviceVK::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 clamp_min, static_cast(downsample_factor), 0, static_cast(downsample_factor * downsample_factor)}; SetUtilityPushConstants(&uniforms, sizeof(uniforms)); + pxAssert(dTex->GetFormat() == m_emulation_hw_rt_texture_format); // "ShaderConvert::DOWNSAMPLE_COPY" expects RTs of this format const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY; //const GSVector4 dRect = GSVector4(dTex->GetRect()); DoStretchRect(static_cast(sTex), GSVector4::zero(), static_cast(dTex), dRect, @@ -3229,6 +3280,8 @@ void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, } static_cast(dTex)->TransitionToLayout(GSTextureVK::Layout::ColorAttachment); + pxAssert(dTex->GetFormat() == m_postprocess_texture_format); // "ShaderConvert::COPY_POSTPROCESS" and "m_postprocess_render_pass_*" expect RTs of this format + const GSVector2i dsize(dTex->GetSize()); const GSVector4i darea(0, 0, dsize.x, dsize.y); bool dcleared = false; @@ -3241,8 +3294,8 @@ void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, static_cast(sTex[1])->TransitionToLayout(GSTextureVK::Layout::ShaderReadOnly); OMSetRenderTargets(dTex, nullptr, darea); SetUtilityTexture(sTex[1], sampler); - BeginClearRenderPass(m_utility_color_render_pass_clear, darea, c); - SetPipeline(m_convert[static_cast(ShaderConvert::COPY)]); + BeginClearRenderPass(m_postprocess_render_pass_clear, darea, c); + SetPipeline(m_convert[static_cast(ShaderConvert::COPY_POSTPROCESS)]); DrawStretchRect(sRect[1], PMODE.SLBG ? dRect[2] : dRect[1], dsize); dTex->SetState(GSTexture::State::Dirty); dcleared = true; @@ -3280,13 +3333,13 @@ void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, { EndRenderPass(); OMSetRenderTargets(dTex, nullptr, darea); - BeginClearRenderPass(m_utility_color_render_pass_clear, darea, c); + BeginClearRenderPass(m_postprocess_render_pass_clear, darea, c); dTex->SetState(GSTexture::State::Dirty); } else if (!InRenderPass()) { OMSetRenderTargets(dTex, nullptr, darea); - BeginRenderPass(m_utility_color_render_pass_load, darea); + BeginRenderPass(m_postprocess_render_pass_load, darea); } if (sTex[0] && sTex[0]->GetState() == GSTexture::State::Dirty) @@ -3300,6 +3353,7 @@ void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, if (feedback_write_1) { + pxAssert(sTex[2]->GetFormat() == m_emulation_hw_rt_texture_format); // "ShaderConvert::YUV" and "m_utility_color_render_pass_load" expect RTs of this format EndRenderPass(); SetPipeline(m_convert[static_cast(ShaderConvert::YUV)]); SetUtilityTexture(dTex, sampler); @@ -3336,17 +3390,17 @@ void GSDeviceVK::DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* static_cast(dTex)->TransitionToLayout(GSTextureVK::Layout::ShaderReadOnly); } -void GSDeviceVK::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) +void GSDeviceVK::DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) { const GSVector4 sRect = GSVector4(0.0f, 0.0f, 1.0f, 1.0f); const GSVector4i dRect = dTex->GetRect(); EndRenderPass(); OMSetRenderTargets(dTex, nullptr, dRect); SetUtilityTexture(sTex, m_point_sampler); - BeginRenderPass(m_utility_color_render_pass_discard, dRect); + BeginRenderPass(m_postprocess_render_pass_discard, dRect); dTex->SetState(GSTexture::State::Dirty); - SetPipeline(m_shadeboost_pipeline); - SetUtilityPushConstants(params, sizeof(float) * 4); + SetPipeline(m_colorcorrect_pipeline); + SetUtilityPushConstants(&cb, sizeof(cb)); DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize()); EndRenderPass(); @@ -3360,7 +3414,7 @@ void GSDeviceVK::DoFXAA(GSTexture* sTex, GSTexture* dTex) EndRenderPass(); OMSetRenderTargets(dTex, nullptr, dRect); SetUtilityTexture(sTex, m_linear_sampler); - BeginRenderPass(m_utility_color_render_pass_discard, dRect); + BeginRenderPass(m_postprocess_render_pass_discard, dRect); dTex->SetState(GSTexture::State::Dirty); SetPipeline(m_fxaa_pipeline); DrawStretchRect(sRect, GSVector4(dRect), dTex->GetSize()); @@ -3822,9 +3876,9 @@ bool GSDeviceVK::CreateRenderPasses() return false; \ } while (0) - const VkFormat rt_format = LookupNativeFormat(GSTexture::Format::Color); - const VkFormat colclip_rt_format = LookupNativeFormat(GSTexture::Format::ColorClip); - const VkFormat depth_format = LookupNativeFormat(GSTexture::Format::DepthStencil); + const VkFormat rt_format = LookupNativeFormat(m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget); + const VkFormat colclip_rt_format = LookupNativeFormat(GSTexture::Format::ColorClip, GSTexture::Type::RenderTarget); + const VkFormat depth_format = LookupNativeFormat(GSTexture::Format::DepthStencil, GSTexture::Type::RenderTarget); for (u32 rt = 0; rt < 2; rt++) { @@ -3874,6 +3928,14 @@ bool GSDeviceVK::CreateRenderPasses() GET(m_utility_depth_render_pass_discard, VK_FORMAT_UNDEFINED, depth_format, false, false, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + const VkFormat pp_rt_format = LookupNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget); + GET(m_postprocess_render_pass_load, pp_rt_format, VK_FORMAT_UNDEFINED, false, false, VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_postprocess_render_pass_clear, pp_rt_format, VK_FORMAT_UNDEFINED, false, false, VK_ATTACHMENT_LOAD_OP_CLEAR, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + GET(m_postprocess_render_pass_discard, pp_rt_format, VK_FORMAT_UNDEFINED, false, false, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE); + m_date_setup_render_pass = GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_STORE_OP_STORE, m_features.stencil_buffer ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_DONT_CARE, @@ -3900,6 +3962,13 @@ bool GSDeviceVK::CompileConvertPipelines() return false; ScopedGuard vs_guard([this, &vs]() { vkDestroyShaderModule(m_device, vs, nullptr); }); + std::string psource; + if (EmuConfig.HDRRendering) + { + psource += "#define PS_HDR 1\n"; + } + psource += *shader; + Vulkan::GraphicsPipelineBuilder gpb; SetPipelineProvokingVertex(m_features, gpb); AddUtilityVertexAttributes(gpb); @@ -3941,9 +4010,23 @@ bool GSDeviceVK::CompileConvertPipelines() rp = m_date_setup_render_pass; } break; + case ShaderConvert::COPY_EMU_LQ: + { + rp = GetRenderPass(LookupNativeFormat(GSTexture::Format::Color, GSTexture::Type::RenderTarget), + LookupNativeFormat(depth ? GSTexture::Format::DepthStencil : GSTexture::Format::Invalid), + VK_ATTACHMENT_LOAD_OP_DONT_CARE); + } + break; + case ShaderConvert::COPY_POSTPROCESS: + { + rp = GetRenderPass(LookupNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget), + LookupNativeFormat(depth ? GSTexture::Format::DepthStencil : GSTexture::Format::Invalid), + VK_ATTACHMENT_LOAD_OP_DONT_CARE); + } + break; default: { - rp = GetRenderPass(LookupNativeFormat(depth ? GSTexture::Format::Invalid : GSTexture::Format::Color), + rp = GetRenderPass(LookupNativeFormat(depth ? GSTexture::Format::Invalid : m_emulation_hw_rt_texture_format, GSTexture::Type::RenderTarget), LookupNativeFormat(depth ? GSTexture::Format::DepthStencil : GSTexture::Format::Invalid), VK_ATTACHMENT_LOAD_OP_DONT_CARE); } @@ -3969,7 +4052,7 @@ bool GSDeviceVK::CompileConvertPipelines() gpb.SetColorWriteMask(0, ShaderConvertWriteMask(i)); - VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i)); + VkShaderModule ps = GetUtilityFragmentShader(psource, shaderName(i)); if (ps == VK_NULL_HANDLE) return false; @@ -4060,7 +4143,7 @@ bool GSDeviceVK::CompileConvertPipelines() { const std::string entry_point(StringUtil::StdStringFromFormat("ps_stencil_image_init_%d", datm)); VkShaderModule ps = - GetUtilityFragmentShader(*shader, entry_point.c_str()); + GetUtilityFragmentShader(psource, entry_point.c_str()); if (ps == VK_NULL_HANDLE) return false; @@ -4104,7 +4187,14 @@ bool GSDeviceVK::CompilePresentPipelines() return false; } - VkShaderModule vs = GetUtilityVertexShader(*shader); + std::string source; + if (EmuConfig.HDROutput) + { + source += "#define PS_HDR 1\n"; + } + source += *shader; + + VkShaderModule vs = GetUtilityVertexShader(source); if (vs == VK_NULL_HANDLE) return false; ScopedGuard vs_guard([this, &vs]() { vkDestroyShaderModule(m_device, vs, nullptr); }); @@ -4128,7 +4218,7 @@ bool GSDeviceVK::CompilePresentPipelines() { const int index = static_cast(i); - VkShaderModule ps = GetUtilityFragmentShader(*shader, shaderName(i)); + VkShaderModule ps = GetUtilityFragmentShader(source, shaderName(i)); if (ps == VK_NULL_HANDLE) return false; @@ -4156,7 +4246,7 @@ bool GSDeviceVK::CompileInterlacePipelines() } VkRenderPass rp = - GetRenderPass(LookupNativeFormat(GSTexture::Format::Color), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); + GetRenderPass(LookupNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); if (!rp) return false; @@ -4207,7 +4297,7 @@ bool GSDeviceVK::CompileMergePipelines() } VkRenderPass rp = - GetRenderPass(LookupNativeFormat(GSTexture::Format::Color), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); + GetRenderPass(LookupNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); if (!rp) return false; @@ -4252,7 +4342,7 @@ bool GSDeviceVK::CompileMergePipelines() bool GSDeviceVK::CompilePostProcessingPipelines() { VkRenderPass rp = - GetRenderPass(LookupNativeFormat(GSTexture::Format::Color), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); + GetRenderPass(LookupNativeFormat(m_postprocess_texture_format, GSTexture::Type::RenderTarget), VK_FORMAT_UNDEFINED, VK_ATTACHMENT_LOAD_OP_LOAD); if (!rp) return false; @@ -4283,7 +4373,12 @@ bool GSDeviceVK::CompilePostProcessingPipelines() return false; } - const std::string psource = "#define FXAA_GLSL_VK 1\n" + *pshader; + std::string psource = "#define FXAA_GLSL_VK 1\n"; + if (EmuConfig.HDROutput) + { + psource += "#define PS_HDR 1\n"; + } + psource += *pshader; VkShaderModule vs = GetUtilityVertexShader(*vshader); if (vs == VK_NULL_HANDLE) @@ -4304,19 +4399,30 @@ bool GSDeviceVK::CompilePostProcessingPipelines() } { - const std::optional shader = ReadShaderSource("shaders/vulkan/shadeboost.glsl"); + const std::optional shader = ReadShaderSource("shaders/vulkan/colorcorrect.glsl"); if (!shader) { - Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/shadeboost.glsl."); + Host::ReportErrorAsync("GS", "Failed to read shaders/vulkan/colorcorrect.glsl."); return false; } - VkShaderModule vs = GetUtilityVertexShader(*shader); + std::string source; + if (EmuConfig.HDRRendering) + { + source += "#define PS_HDR_INPUT 1\n"; + } + if (EmuConfig.HDROutput) + { + source += "#define PS_HDR_OUTPUT 1\n"; + } + source += *shader; + + VkShaderModule vs = GetUtilityVertexShader(source); ScopedGuard vs_guard([this, &vs]() { vkDestroyShaderModule(m_device, vs, nullptr); }); if (vs == VK_NULL_HANDLE) return false; - VkShaderModule ps = GetUtilityFragmentShader(*shader); + VkShaderModule ps = GetUtilityFragmentShader(source); ScopedGuard ps_guard([this, &ps]() { vkDestroyShaderModule(m_device, ps, nullptr); }); if (ps == VK_NULL_HANDLE) return false; @@ -4324,11 +4430,11 @@ bool GSDeviceVK::CompilePostProcessingPipelines() gpb.SetVertexShader(vs); gpb.SetFragmentShader(ps); - m_shadeboost_pipeline = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); - if (!m_shadeboost_pipeline) + m_colorcorrect_pipeline = gpb.Create(m_device, g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_colorcorrect_pipeline) return false; - Vulkan::SetObjectName(m_device, m_shadeboost_pipeline, "Shadeboost pipeline"); + Vulkan::SetObjectName(m_device, m_colorcorrect_pipeline, "ColorCorrect pipeline"); } return true; @@ -4387,7 +4493,14 @@ bool GSDeviceVK::CompileImGuiPipeline() return false; } - VkShaderModule vs = GetUtilityVertexShader(glsl.value(), "vs_main"); + std::string source; + if (EmuConfig.HDROutput) + { + source += "#define PS_HDR 1\n"; + } + source += *glsl; + + VkShaderModule vs = GetUtilityVertexShader(source, "vs_main"); if (vs == VK_NULL_HANDLE) { Console.Error("VK: Failed to compile ImGui vertex shader"); @@ -4395,7 +4508,7 @@ bool GSDeviceVK::CompileImGuiPipeline() } ScopedGuard vs_guard([this, &vs]() { vkDestroyShaderModule(m_device, vs, nullptr); }); - VkShaderModule ps = GetUtilityFragmentShader(glsl.value(), "ps_main"); + VkShaderModule ps = GetUtilityFragmentShader(source, "ps_main"); if (ps == VK_NULL_HANDLE) { Console.Error("VK: Failed to compile ImGui pixel shader"); @@ -4440,13 +4553,26 @@ void GSDeviceVK::RenderImGui() if (draw_data->CmdListsCount == 0) return; - const float uniforms[2][2] = {{ + // Imgui currently follows the same brightness as the whole HDR image (applied earlier on presentation), + // we could expose this variable to users to make it brightness + float imgui_hdr_brightness_nits = GSConfig.HDR_BrightnessNits; + + const float uniforms[4][2] = {{ 2.0f / static_cast(m_window_info.surface_width), 2.0f / static_cast(m_window_info.surface_height), }, { -1.0f, -1.0f, + }, + { + EmuConfig.HDROutput ? (imgui_hdr_brightness_nits / Pcsx2Config::GSOptions::DEFAULT_SRGB_BRIGHTNESS_NITS) : 1.f, + 0.0f, + }, + // Padding + { + 0.0f, + 0.0f, }}; SetUtilityPushConstants(uniforms, sizeof(uniforms)); @@ -4621,8 +4747,8 @@ void GSDeviceVK::DestroyResources() } if (m_fxaa_pipeline != VK_NULL_HANDLE) vkDestroyPipeline(m_device, m_fxaa_pipeline, nullptr); - if (m_shadeboost_pipeline != VK_NULL_HANDLE) - vkDestroyPipeline(m_device, m_shadeboost_pipeline, nullptr); + if (m_colorcorrect_pipeline != VK_NULL_HANDLE) + vkDestroyPipeline(m_device, m_colorcorrect_pipeline, nullptr); for (VkPipeline it : m_cas_pipelines) { @@ -4788,6 +4914,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb); AddMacro(ss, "PS_NO_COLOR", sel.no_color); AddMacro(ss, "PS_NO_COLOR1", sel.no_color1); + AddMacro(ss, "PS_HDR", EmuConfig.HDRRendering); ss << m_tfx_source; VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str()); @@ -5721,7 +5848,8 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) if (config.require_one_barrier && !m_features.texture_barrier) { // requires a copy of the RT - draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, colclip_rt ? GSTexture::Format::ColorClip : GSTexture::Format::Color, true)); + pxAssert(draw_rt->GetFormat() == (colclip_rt ? GSTexture::Format::ColorClip : m_emulation_hw_rt_texture_format)); + draw_rt_clone = static_cast(CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true)); if (draw_rt_clone) { EndRenderPass(); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 975b45f167..4220ca173f 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -345,6 +345,7 @@ public: { NUM_TFX_DYNAMIC_OFFSETS = 2, NUM_UTILITY_SAMPLERS = 1, + // This needs to match the sum of all the utility cbuffer sizes CONVERT_PUSH_CONSTANTS_SIZE = 96, NUM_CAS_PIPELINES = 2, @@ -399,7 +400,7 @@ private: VkRenderPass m_date_image_setup_render_passes[2][2] = {}; // [depth][clear] VkPipeline m_date_image_setup_pipelines[2][4] = {}; // [depth][datm] VkPipeline m_fxaa_pipeline = {}; - VkPipeline m_shadeboost_pipeline = {}; + VkPipeline m_colorcorrect_pipeline = {}; std::unordered_map m_tfx_vertex_shaders; std::unordered_map @@ -414,6 +415,9 @@ private: VkRenderPass m_utility_depth_render_pass_discard = VK_NULL_HANDLE; VkRenderPass m_date_setup_render_pass = VK_NULL_HANDLE; VkRenderPass m_swap_chain_render_pass = VK_NULL_HANDLE; + VkRenderPass m_postprocess_render_pass_load = VK_NULL_HANDLE; + VkRenderPass m_postprocess_render_pass_clear = VK_NULL_HANDLE; + VkRenderPass m_postprocess_render_pass_discard = VK_NULL_HANDLE; VkRenderPass m_tfx_render_pass[2][2][2][3][2][2][3][3] = {}; // [rt][ds][colclip][date][fbl][dsp][rt_op][ds_op] @@ -434,7 +438,7 @@ private: const GSRegEXTBUF& EXTBUF, u32 c, const bool linear) final; void DoInterlace(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderInterlace shader, bool linear, const InterlaceConstantBuffer& cb) final; - void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; + void DoColorCorrect(GSTexture* sTex, GSTexture* dTex, const ColorCorrectConstantBuffer& cb) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; bool DoCAS( @@ -580,7 +584,7 @@ public: ////////////////////////////////////////////////////////////////////////// public: - VkFormat LookupNativeFormat(GSTexture::Format format) const; + VkFormat LookupNativeFormat(GSTexture::Format format, GSTexture::Type type = GSTexture::Type::Invalid) const; __fi VkFramebuffer GetCurrentFramebuffer() const { return m_current_framebuffer; } diff --git a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp index 1ce0bc3c69..4027f7bfc4 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp @@ -65,7 +65,7 @@ GSTextureVK::~GSTextureVK() std::unique_ptr GSTextureVK::Create(Type type, Format format, int width, int height, int levels) { - const VkFormat vk_format = GSDeviceVK::GetInstance()->LookupNativeFormat(format); + const VkFormat vk_format = GSDeviceVK::GetInstance()->LookupNativeFormat(format, type); VkImageCreateInfo ici = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, nullptr, 0, VK_IMAGE_TYPE_2D, vk_format, {static_cast(width), static_cast(height), 1}, static_cast(levels), 1, VK_SAMPLE_COUNT_1_BIT, diff --git a/pcsx2/GS/Renderers/Vulkan/VKSwapChain.cpp b/pcsx2/GS/Renderers/Vulkan/VKSwapChain.cpp index 9b9295ed3a..138d81588c 100644 --- a/pcsx2/GS/Renderers/Vulkan/VKSwapChain.cpp +++ b/pcsx2/GS/Renderers/Vulkan/VKSwapChain.cpp @@ -185,16 +185,49 @@ std::optional VKSwapChain::SelectSurfaceFormat(VkSurfaceKHR GSDeviceVK::GetInstance()->GetPhysicalDevice(), surface, &format_count, surface_formats.data()); pxAssert(res == VK_SUCCESS); + constexpr VkFormat default_sdr_format = VK_FORMAT_R8G8B8A8_UNORM; + // If there is a single undefined surface format, the device doesn't care, so we'll just use RGBA - if (surface_formats[0].format == VK_FORMAT_UNDEFINED) - return VkSurfaceFormatKHR{VK_FORMAT_R8G8B8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; + if (surface_formats.empty() || surface_formats[0].format == VK_FORMAT_UNDEFINED) + return VkSurfaceFormatKHR{default_sdr_format, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; + + // HDR if enabled and supported + if (EmuConfig.HDROutput) + { + for (const VkSurfaceFormatKHR& surface_format : surface_formats) + { + if (surface_format.format == VK_FORMAT_R16G16B16A16_SFLOAT && surface_format.colorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) + { + return surface_format; + } + } + // Disable it as it's not supported + EmuConfig.HDROutput = false; + WARNING_LOG("Vulkan swapchain doesn't support HDR formats."); + } + + // Use RGB10A2 if available (higher quality than RGBA8) + for (const VkSurfaceFormatKHR& surface_format : surface_formats) + { + INFO_LOG("Vulkan swapchain supports format {} color space {}.", (u32)surface_format.format, (u32)surface_format.colorSpace); + if (surface_format.format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 && surface_format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) + { + return surface_format; + } + } // Try to find a suitable format. for (const VkSurfaceFormatKHR& surface_format : surface_formats) { + // We could easily support "VK_COLOR_SPACE_BT709_LINEAR_EXT" and "VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT" for SDR if we linearized on presentation, + // but at the moment we don't. + if (surface_format.colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) + { + continue; + } // Some drivers seem to return a SRGB format here (Intel Mesa). // This results in gamma correction when presenting to the screen, which we don't want. - // Use a linear format instead, if this is the case. + // Use a non sRGB format instead, if this is the case. return VkSurfaceFormatKHR{GetLinearFormat(surface_format.format), VK_COLOR_SPACE_SRGB_NONLINEAR_KHR}; } @@ -446,10 +479,21 @@ bool VKSwapChain::CreateSwapChain() m_images.reserve(image_count); m_current_image = 0; + // It's too early to call "GSDeviceVK::LookupNativeFormat()" + GSTexture::Format format = GSTexture::Format::Color; + switch (surface_format->format) + { + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + format = GSTexture::Format::ColorHQ; + break; + case VK_FORMAT_R16G16B16A16_SFLOAT: + format = GSTexture::Format::ColorHDR; + break; + } for (u32 i = 0; i < image_count; i++) { std::unique_ptr texture = - GSTextureVK::Adopt(images[i], GSTexture::Type::RenderTarget, GSTexture::Format::Color, + GSTextureVK::Adopt(images[i], GSTexture::Type::RenderTarget, format, m_window_info.surface_width, m_window_info.surface_height, 1, surface_format->format); if (!texture) return false; diff --git a/pcsx2/ImGui/FullscreenUI.cpp b/pcsx2/ImGui/FullscreenUI.cpp index 818a94c9b1..af843f7ea5 100644 --- a/pcsx2/ImGui/FullscreenUI.cpp +++ b/pcsx2/ImGui/FullscreenUI.cpp @@ -4076,6 +4076,8 @@ void FullscreenUI::DrawGraphicsSettingsPage(SettingsInterface* bsi, bool show_ad "accurate_blending_unit", static_cast(AccBlendLevel::Basic), s_blending_options, std::size(s_blending_options), true); DrawToggleSetting( bsi, FSUI_CSTR("Mipmapping"), FSUI_CSTR("Enables emulation of the GS's texture mipmapping."), "EmuCore/GS", "hw_mipmap", true); + DrawToggleSetting( + bsi, FSUI_CSTR("HDR"), FSUI_CSTR("Forces all rendering to be in HDR without integer rounding, and HDR output. It will likely break many games. It might not work on all rendering backends."), "EmuCore/GS", "hdr", false); } else { @@ -4298,7 +4300,28 @@ void FullscreenUI::DrawGraphicsSettingsPage(SettingsInterface* bsi, bool show_ad DrawIntRangeSetting(bsi, FSUI_CSTR("Shade Boost Contrast"), FSUI_CSTR("Adjusts contrast. 50 is normal."), "EmuCore/GS", "ShadeBoost_Contrast", 50, 1, 100, "%d", shadeboost_active); DrawIntRangeSetting(bsi, FSUI_CSTR("Shade Boost Saturation"), FSUI_CSTR("Adjusts saturation. 50 is normal."), "EmuCore/GS", - "ShadeBoost_Saturation", 50, 1, 100, "%d", shadeboost_active); + "ShadeBoost_Saturation", 50, 0, 100, "%d", shadeboost_active); + + const bool colorcorrect_active = GetEffectiveBoolSetting(bsi, "EmuCore/GS", "ColorCorrect", false); + + DrawToggleSetting(bsi, FSUI_CSTR("Color Correct"), FSUI_CSTR("This will interpret the game as having this specific gamma, and convert it to your display gamma (meant to be 2.2).\n2.35 is the average CRT TV gamma."), "EmuCore/GS", "ColorCorrect", false); + DrawFloatSpinBoxSetting(bsi, FSUI_CSTR("Color Correct Game Gamma"), FSUI_CSTR("Adjusts brightness. 50 is normal."), "EmuCore/GS", + "ColorCorrect_GameGamma", Pcsx2Config::GSOptions::DEFAULT_GAME_GAMMA, 2.0f, 3.0f, 0.01f, 1.f, "%f", colorcorrect_active); + static constexpr const char* s_color_spaces[] = { + FSUI_NSTR("Rec.709/sRGB (Default)"), + FSUI_NSTR("NTSC-M"), + FSUI_NSTR("NTSC-J"), + FSUI_NSTR("PAL"), + }; + DrawIntListSetting(bsi, FSUI_CSTR("Color Correct Game Color Space"), FSUI_CSTR("This will interpret the game as being developed on (or for) a specific color space (each region had its own), and convert it to your display color space (Rec.709/sRGB).\nIt's not know what standard each game targeted, if any."), "EmuCore/GS", "ColorCorrect_GameColorSpace", 0, + s_color_spaces, std::size(s_color_spaces), colorcorrect_active); + + const bool hdr_active = GetEffectiveBoolSetting(bsi, "EmuCore/GS", "hdr", false); + + DrawIntRangeSetting(bsi, FSUI_CSTR("HDR Nits"), FSUI_CSTR("Adjusts the brightness of the HDR output (in nits). 203 nits is standard."), "EmuCore/GS", + "HDR_BrightnessNits", Pcsx2Config::GSOptions::DEFAULT_HDR_BRIGHTNESS_NITS, 80, 500, "%d", hdr_active); + DrawIntRangeSetting(bsi, FSUI_CSTR("HDR Peak Nits"), FSUI_CSTR("Adjusts the peak brightness of the HDR output (in nits). It should match your display peak brightness."), "EmuCore/GS", + "HDR_PeakBrightnessNits", Pcsx2Config::GSOptions::DEFAULT_HDR_PEAK_BRIGHTNESS_NITS, 400, 10000, "%d", hdr_active); static constexpr const char* s_tv_shaders[] = { FSUI_NSTR("None (Default)"), @@ -7301,6 +7324,7 @@ void FullscreenUI::DrawAchievementsSettingsPage(std::unique_lock& se // To avoid having to type T_RANSLATE("FullscreenUI", ...) everywhere, we use the shorter macros at the top // of the file, then preprocess and generate a bunch of noops here to define the strings. Sadly that means // the view in Linguist is gonna suck, but you can search the file for the string for more context. +// NOTE: if everything goes correctly, these are automatically generated and don't need to be added manually. ///////////////////////////////////////////////////////////////////////////////////////////////////////////// #if 0 diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 2e5cb62c7d..150a08cba6 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -736,6 +736,8 @@ Pcsx2Config::GSOptions::GSOptions() PreloadFrameWithGSData = false; Mipmap = true; HWMipmap = true; + HDRRendering = false; + HDROutput = false; ManualUserHacks = false; UserHacks_AlignSpriteX = false; @@ -838,6 +840,10 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(ShadeBoost_Brightness) && OpEqu(ShadeBoost_Contrast) && OpEqu(ShadeBoost_Saturation) && + OpEqu(ColorCorrect_GameGamma) && + OpEqu(ColorCorrect_GameColorSpace) && + OpEqu(HDR_BrightnessNits) && + OpEqu(HDR_PeakBrightnessNits) && OpEqu(PNGCompressionLevel) && OpEqu(SaveDrawStart) && OpEqu(SaveDrawCount) && @@ -878,6 +884,8 @@ bool Pcsx2Config::GSOptions::RestartOptionsAreEqual(const GSOptions& right) cons return OpEqu(Renderer) && OpEqu(Adapter) && OpEqu(UseDebugDevice) && + OpEqu(HDRRendering) && + OpEqu(HDROutput) && OpEqu(UseBlitSwapChain) && OpEqu(DisableShaderCache) && OpEqu(DisableFramebufferFetch) && @@ -949,6 +957,8 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBoolEx(AutoFlushSW, "autoflush_sw"); SettingsWrapBitBoolEx(PreloadFrameWithGSData, "preload_frame_with_gs_data"); SettingsWrapBitBoolEx(Mipmap, "mipmap"); + SettingsWrapBitBoolEx(HDRRendering, "hdr"); + SettingsWrapBitBoolEx(HDROutput, "hdr"); // For now this doesn't have its own separate setting SettingsWrapBitBoolEx(ManualUserHacks, "UserHacks"); SettingsWrapBitBoolEx(UserHacks_AlignSpriteX, "UserHacks_align_sprite_X"); SettingsWrapIntEnumEx(UserHacks_AutoFlush, "UserHacks_AutoFlushLevel"); @@ -966,6 +976,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBoolEx(UserHacks_EstimateTextureRegion, "UserHacks_EstimateTextureRegion"); SettingsWrapBitBoolEx(FXAA, "fxaa"); SettingsWrapBitBool(ShadeBoost); + SettingsWrapBitBool(ColorCorrect); SettingsWrapBitBoolEx(DumpGSData, "DumpGSData"); SettingsWrapBitBoolEx(SaveRT, "SaveRT"); SettingsWrapBitBoolEx(SaveFrame, "SaveFrame"); @@ -1029,6 +1040,10 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitfield(ShadeBoost_Brightness); SettingsWrapBitfield(ShadeBoost_Contrast); SettingsWrapBitfield(ShadeBoost_Saturation); + SettingsWrapEntry(ColorCorrect_GameGamma); + SettingsWrapIntEnumEx(ColorCorrect_GameColorSpace, "ColorCorrect_GameColorSpace"); + SettingsWrapEntry(HDR_BrightnessNits); + SettingsWrapEntry(HDR_PeakBrightnessNits); SettingsWrapBitfield(ExclusiveFullscreenControl); SettingsWrapBitfieldEx(PNGCompressionLevel, "png_compression_level"); SettingsWrapBitfieldEx(SaveDrawStart, "SaveDrawStart"); @@ -2042,6 +2057,8 @@ void Pcsx2Config::CopyRuntimeConfig(Pcsx2Config& cfg) CurrentGameArgs = std::move(cfg.CurrentGameArgs); CurrentAspectRatio = cfg.CurrentAspectRatio; CurrentCustomAspectRatio = cfg.CurrentCustomAspectRatio; + HDRRendering = cfg.HDRRendering; + HDROutput = cfg.HDROutput; IsPortableMode = cfg.IsPortableMode; for (u32 i = 0; i < sizeof(Mcd) / sizeof(Mcd[0]); i++) diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 75899116da..9f330c1f7c 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 63; +static constexpr u32 SHADER_CACHE_VERSION = 64; diff --git a/pcsx2/VMManager.cpp b/pcsx2/VMManager.cpp index edcee8faaa..4a87c82312 100644 --- a/pcsx2/VMManager.cpp +++ b/pcsx2/VMManager.cpp @@ -3179,6 +3179,11 @@ void VMManager::WarnAboutUnsafeSettings() append(ICON_FA_IMAGES, TRANSLATE_SV("VMManager", "Mipmapping is disabled. This may break rendering in some games.")); } + if (EmuConfig.GS.HDRRendering) + { + append(ICON_FA_IMAGES, + TRANSLATE_SV("VMManager", "HDR rendering is enabled. This may break rendering in some games.")); + } if (EmuConfig.GS.UseDebugDevice) { append(ICON_FA_BUG, diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 07ec5f367c..783369c141 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -76,19 +76,19 @@ - + - + - + @@ -1027,4 +1027,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 367c744acf..9a016cff82 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -354,9 +354,6 @@ System\Ps2\GS\Shaders\OpenGL - - System\Ps2\GS\Shaders\OpenGL - System\Ps2\GS\Shaders\OpenGL @@ -381,15 +378,9 @@ System\Ps2\GS\Shaders\Vulkan - - System\Ps2\GS\Shaders\Vulkan - System\Ps2\GS\Shaders\Direct3D - - System\Ps2\GS\Shaders\Direct3D - System\Ps2\GS\Shaders\Direct3D @@ -408,6 +399,15 @@ System\Ps2\GS\Renderers\Vulkan + + System\Ps2\GS\Shaders\OpenGL + + + System\Ps2\GS\Shaders\Vulkan + + + System\Ps2\GS\Shaders\Direct3D + @@ -2434,4 +2434,4 @@ System\Ps2\GS - + \ No newline at end of file