From f7c79fe3ccc022c8180017602c4fb970334fb81f Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Mon, 21 Nov 2022 00:20:40 +1000 Subject: [PATCH] GS: Implement FidelityFX CAS --- bin/resources/shaders/dx11/cas.hlsl | 75 ++++++++++ bin/resources/shaders/opengl/cas.glsl | 64 +++++++++ bin/resources/shaders/vulkan/cas.glsl | 71 ++++++++++ common/D3D12/Texture.cpp | 137 ++++++++++++++----- common/D3D12/Texture.h | 15 +- pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 3 + pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 64 ++++++++- pcsx2/Config.h | 11 ++ pcsx2/Frontend/D3D12HostDisplay.cpp | 4 +- pcsx2/Frontend/FullscreenUI.cpp | 20 ++- pcsx2/GS/Renderers/Common/GSDevice.cpp | 88 +++++++++--- pcsx2/GS/Renderers/Common/GSDevice.h | 15 ++ pcsx2/GS/Renderers/Common/GSRenderer.cpp | 81 ++++++----- pcsx2/GS/Renderers/Common/GSTexture.h | 1 + pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 58 ++++++++ pcsx2/GS/Renderers/DX11/GSDevice11.h | 12 ++ pcsx2/GS/Renderers/DX11/GSTexture11.cpp | 8 ++ pcsx2/GS/Renderers/DX11/GSTexture11.h | 2 + pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 77 +++++++++++ pcsx2/GS/Renderers/DX12/GSDevice12.h | 11 ++ pcsx2/GS/Renderers/DX12/GSTexture12.cpp | 19 ++- pcsx2/GS/Renderers/DX12/GSTexture12.h | 2 +- pcsx2/GS/Renderers/Metal/GSDeviceMTL.h | 5 + pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 9 ++ pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 74 +++++++++- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h | 11 ++ pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 119 ++++++++++++++++ pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 9 ++ pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp | 18 +++ pcsx2/Pcsx2Config.cpp | 6 + 30 files changed, 976 insertions(+), 113 deletions(-) create mode 100644 bin/resources/shaders/dx11/cas.hlsl create mode 100644 bin/resources/shaders/opengl/cas.glsl create mode 100644 bin/resources/shaders/vulkan/cas.glsl diff --git a/bin/resources/shaders/dx11/cas.hlsl b/bin/resources/shaders/dx11/cas.hlsl new file mode 100644 index 0000000000..50b13c54f6 --- /dev/null +++ b/bin/resources/shaders/dx11/cas.hlsl @@ -0,0 +1,75 @@ +// Based on CAS_Shader.hlsl +// +// Copyright(c) 2019 Advanced Micro Devices, Inc.All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +cbuffer cb : register(b0) +{ + uint4 const0; + uint4 const1; + int2 srcOffset; +}; + +Texture2D InputTexture : register(t0); +RWTexture2D OutputTexture : register(u0); + +#define A_GPU 1 +#define A_HLSL 1 + +#include "ffx_a.h" + +AF3 CasLoad(ASU2 p) +{ + return InputTexture.Load(int3(srcOffset, 0) + int3(p, 0)).rgb; +} + +// Lets you transform input from the load into a linear color space between 0 and 1. See ffx_cas.h +// In this case, our input is already linear and between 0 and 1 +void CasInput(inout AF1 r, inout AF1 g, inout AF1 b) {} + +#include "ffx_cas.h" + +[numthreads(64, 1, 1)] +void main(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(LocalThreadId.x) + AU2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + +#if CAS_SHARPEN_ONLY + const bool sharpenOnly = true; +#else + const bool sharpenOnly = false; +#endif + + // Filter. + AF3 c; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly); + OutputTexture[ASU2(gxy)] = AF4(c, 1); + gxy.x += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly); + OutputTexture[ASU2(gxy)] = AF4(c, 1); + gxy.y += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly); + OutputTexture[ASU2(gxy)] = AF4(c, 1); + gxy.x -= 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly); + OutputTexture[ASU2(gxy)] = AF4(c, 1); +} diff --git a/bin/resources/shaders/opengl/cas.glsl b/bin/resources/shaders/opengl/cas.glsl new file mode 100644 index 0000000000..695c0f59b8 --- /dev/null +++ b/bin/resources/shaders/opengl/cas.glsl @@ -0,0 +1,64 @@ +// Based on CAS_Shader.glsl +// Copyright(c) 2019 Advanced Micro Devices, Inc.All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +uniform uvec4 const0; +uniform uvec4 const1; +uniform ivec2 srcOffset; + +layout(binding=0) uniform sampler2D imgSrc; +layout(binding=0, rgba8) uniform writeonly image2D imgDst; + +#define A_GPU 1 +#define A_GLSL 1 + +#include "ffx_a.h" + +AF3 CasLoad(ASU2 p) +{ + return texelFetch(imgSrc, srcOffset + ivec2(p), 0).rgb; +} + +// Lets you transform input from the load into a linear color space between 0 and 1. See ffx_cas.h +// In this case, our input is already linear and between 0 and 1 +void CasInput(inout AF1 r, inout AF1 g, inout AF1 b) {} + +#include "ffx_cas.h" + +layout(local_size_x=64) in; +void main() +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x)+AU2(gl_WorkGroupID.x<<4u,gl_WorkGroupID.y<<4u); + + // Filter. + AF4 c; + CasFilter(c.r, c.g, c.b, gxy, const0, const1, CAS_SHARPEN_ONLY); + imageStore(imgDst, ASU2(gxy), c); + gxy.x += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, CAS_SHARPEN_ONLY); + imageStore(imgDst, ASU2(gxy), c); + gxy.y += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, CAS_SHARPEN_ONLY); + imageStore(imgDst, ASU2(gxy), c); + gxy.x -= 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, CAS_SHARPEN_ONLY); + imageStore(imgDst, ASU2(gxy), c); +} diff --git a/bin/resources/shaders/vulkan/cas.glsl b/bin/resources/shaders/vulkan/cas.glsl new file mode 100644 index 0000000000..0e9bdcc98a --- /dev/null +++ b/bin/resources/shaders/vulkan/cas.glsl @@ -0,0 +1,71 @@ +#version 460 core +#extension GL_EXT_samplerless_texture_functions : require + +// Based on CAS_Shader.glsl +// Copyright(c) 2019 Advanced Micro Devices, Inc.All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout(push_constant) uniform const_buffer +{ + uvec4 const0; + uvec4 const1; + ivec2 srcOffset; +}; + +layout(set=0, binding=0) uniform texture2D imgSrc; +layout(set=0, binding=1, rgba8) uniform writeonly image2D imgDst; +layout(constant_id=0) const int sharpenOnly = 0; + +#define A_GPU 1 +#define A_GLSL 1 + +#include "ffx_a.h" + +AF3 CasLoad(ASU2 p) +{ + return texelFetch(imgSrc, srcOffset + ivec2(p), 0).rgb; +} + +// Lets you transform input from the load into a linear color space between 0 and 1. See ffx_cas.h +// In this case, our input is already linear and between 0 and 1 +void CasInput(inout AF1 r, inout AF1 g, inout AF1 b) {} + +#include "ffx_cas.h" + +layout(local_size_x=64) in; +void main() +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x)+AU2(gl_WorkGroupID.x<<4u,gl_WorkGroupID.y<<4u); + + // Filter. + AF4 c; + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly != 0); + imageStore(imgDst, ASU2(gxy), c); + gxy.x += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly != 0); + imageStore(imgDst, ASU2(gxy), c); + gxy.y += 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly != 0); + imageStore(imgDst, ASU2(gxy), c); + gxy.x -= 8u; + + CasFilter(c.r, c.g, c.b, gxy, const0, const1, sharpenOnly != 0); + imageStore(imgDst, ASU2(gxy), c); +} diff --git a/common/D3D12/Texture.cpp b/common/D3D12/Texture.cpp index 08012c4658..62b5da9b15 100644 --- a/common/D3D12/Texture.cpp +++ b/common/D3D12/Texture.cpp @@ -47,22 +47,22 @@ Texture::Texture(Texture&& texture) : m_resource(std::move(texture.m_resource)) , m_allocation(std::move(texture.m_allocation)) , m_srv_descriptor(texture.m_srv_descriptor) - , m_rtv_or_dsv_descriptor(texture.m_rtv_or_dsv_descriptor) + , m_write_descriptor(texture.m_write_descriptor) , m_width(texture.m_width) , m_height(texture.m_height) , m_levels(texture.m_levels) , m_format(texture.m_format) , m_state(texture.m_state) - , m_is_depth_view(texture.m_is_depth_view) + , m_write_descriptor_type(texture.m_write_descriptor_type) { texture.m_srv_descriptor = {}; - texture.m_rtv_or_dsv_descriptor = {}; + texture.m_write_descriptor = {}; texture.m_width = 0; texture.m_height = 0; texture.m_levels = 0; texture.m_format = DXGI_FORMAT_UNKNOWN; texture.m_state = D3D12_RESOURCE_STATE_COMMON; - texture.m_is_depth_view = false; + texture.m_write_descriptor_type = WriteDescriptorType::None; } Texture::~Texture() @@ -76,21 +76,21 @@ Texture& Texture::operator=(Texture&& texture) m_resource = std::move(texture.m_resource); m_allocation = std::move(texture.m_allocation); m_srv_descriptor = texture.m_srv_descriptor; - m_rtv_or_dsv_descriptor = texture.m_rtv_or_dsv_descriptor; + m_write_descriptor = texture.m_write_descriptor; m_width = texture.m_width; m_height = texture.m_height; m_levels = texture.m_levels; m_format = texture.m_format; m_state = texture.m_state; - m_is_depth_view = texture.m_is_depth_view; + m_write_descriptor_type = texture.m_write_descriptor_type; texture.m_srv_descriptor = {}; - texture.m_rtv_or_dsv_descriptor = {}; + texture.m_write_descriptor = {}; texture.m_width = 0; texture.m_height = 0; texture.m_levels = 0; texture.m_format = DXGI_FORMAT_UNKNOWN; texture.m_state = D3D12_RESOURCE_STATE_COMMON; - texture.m_is_depth_view = false; + texture.m_write_descriptor_type = WriteDescriptorType::None; return *this; } @@ -149,8 +149,8 @@ bool Texture::Create(u32 width, u32 height, u32 levels, DXGI_FORMAT format, DXGI return false; } - DescriptorHandle srv_descriptor, rtv_descriptor; - bool is_depth_view = false; + DescriptorHandle srv_descriptor, write_descriptor; + WriteDescriptorType write_descriptor_type = WriteDescriptorType::None; if (srv_format != DXGI_FORMAT_UNKNOWN) { if (!CreateSRVDescriptor(resource.get(), levels, srv_format, &srv_descriptor)) @@ -159,22 +159,32 @@ bool Texture::Create(u32 width, u32 height, u32 levels, DXGI_FORMAT format, DXGI if (rtv_format != DXGI_FORMAT_UNKNOWN) { - pxAssert(dsv_format == DXGI_FORMAT_UNKNOWN); - if (!CreateRTVDescriptor(resource.get(), rtv_format, &rtv_descriptor)) + pxAssert(dsv_format == DXGI_FORMAT_UNKNOWN && !(flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)); + write_descriptor_type = Texture::WriteDescriptorType::RTV; + if (!CreateRTVDescriptor(resource.get(), rtv_format, &write_descriptor)) { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); - return false; - } - } - else if (dsv_format != DXGI_FORMAT_UNKNOWN) - { - if (!CreateDSVDescriptor(resource.get(), dsv_format, &rtv_descriptor)) - { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + g_d3d12_context->GetRTVHeapManager().Free(&srv_descriptor); return false; } - is_depth_view = true; + } + else if (dsv_format != DXGI_FORMAT_UNKNOWN && !(flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)) + { + write_descriptor_type = Texture::WriteDescriptorType::DSV; + if (!CreateDSVDescriptor(resource.get(), dsv_format, &write_descriptor)) + { + g_d3d12_context->GetDSVHeapManager().Free(&srv_descriptor); + return false; + } + } + else if (flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + { + write_descriptor_type = Texture::WriteDescriptorType::UAV; + if (!CreateUAVDescriptor(resource.get(), dsv_format, &write_descriptor)) + { + g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + return false; + } } Destroy(true); @@ -182,13 +192,13 @@ bool Texture::Create(u32 width, u32 height, u32 levels, DXGI_FORMAT format, DXGI m_resource = std::move(resource); m_allocation = std::move(allocation); m_srv_descriptor = std::move(srv_descriptor); - m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); + m_write_descriptor = std::move(write_descriptor); m_width = width; m_height = height; m_levels = levels; m_format = format; m_state = state; - m_is_depth_view = is_depth_view; + m_write_descriptor_type = write_descriptor_type; return true; } @@ -197,7 +207,8 @@ bool Texture::Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI { const D3D12_RESOURCE_DESC desc(texture->GetDesc()); - DescriptorHandle srv_descriptor, rtv_descriptor; + DescriptorHandle srv_descriptor, write_descriptor; + WriteDescriptorType write_descriptor_type = WriteDescriptorType::None; if (srv_format != DXGI_FORMAT_UNKNOWN) { if (!CreateSRVDescriptor(texture.get(), desc.MipLevels, srv_format, &srv_descriptor)) @@ -207,15 +218,26 @@ bool Texture::Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI if (rtv_format != DXGI_FORMAT_UNKNOWN) { pxAssert(dsv_format == DXGI_FORMAT_UNKNOWN); - if (!CreateRTVDescriptor(texture.get(), rtv_format, &rtv_descriptor)) + write_descriptor_type = Texture::WriteDescriptorType::RTV; + if (!CreateRTVDescriptor(texture.get(), rtv_format, &write_descriptor)) { - g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); + g_d3d12_context->GetRTVHeapManager().Free(&srv_descriptor); return false; } } else if (dsv_format != DXGI_FORMAT_UNKNOWN) { - if (!CreateDSVDescriptor(texture.get(), dsv_format, &rtv_descriptor)) + write_descriptor_type = Texture::WriteDescriptorType::DSV; + if (!CreateDSVDescriptor(texture.get(), dsv_format, &write_descriptor)) + { + g_d3d12_context->GetDSVHeapManager().Free(&srv_descriptor); + return false; + } + } + else if (desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) + { + write_descriptor_type = Texture::WriteDescriptorType::UAV; + if (!CreateUAVDescriptor(texture.get(), srv_format, &write_descriptor)) { g_d3d12_context->GetDescriptorHeapManager().Free(&srv_descriptor); return false; @@ -225,7 +247,8 @@ bool Texture::Adopt(ComPtr texture, DXGI_FORMAT srv_format, DXGI m_resource = std::move(texture); m_allocation.reset(); m_srv_descriptor = std::move(srv_descriptor); - m_rtv_or_dsv_descriptor = std::move(rtv_descriptor); + m_write_descriptor = std::move(write_descriptor); + m_write_descriptor_type = write_descriptor_type; m_width = static_cast(desc.Width); m_height = desc.Height; m_levels = desc.MipLevels; @@ -239,10 +262,23 @@ void Texture::Destroy(bool defer /* = true */) if (defer) { g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDescriptorHeapManager(), &m_srv_descriptor); - if (m_is_depth_view) - g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDSVHeapManager(), &m_rtv_or_dsv_descriptor); - else - g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetRTVHeapManager(), &m_rtv_or_dsv_descriptor); + + switch (m_write_descriptor_type) + { + case Texture::WriteDescriptorType::RTV: + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetRTVHeapManager(), &m_write_descriptor); + break; + case Texture::WriteDescriptorType::DSV: + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDSVHeapManager(), &m_write_descriptor); + break; + case Texture::WriteDescriptorType::UAV: + g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetDescriptorHeapManager(), &m_write_descriptor); + break; + case Texture::WriteDescriptorType::None: + default: + break; + } + g_d3d12_context->DeferResourceDestruction(m_allocation.get(), m_resource.get()); m_resource.reset(); m_allocation.reset(); @@ -250,10 +286,22 @@ void Texture::Destroy(bool defer /* = true */) else { g_d3d12_context->GetDescriptorHeapManager().Free(&m_srv_descriptor); - if (m_is_depth_view) - g_d3d12_context->GetDSVHeapManager().Free(&m_rtv_or_dsv_descriptor); - else - g_d3d12_context->GetRTVHeapManager().Free(&m_rtv_or_dsv_descriptor); + + switch (m_write_descriptor_type) + { + case Texture::WriteDescriptorType::RTV: + g_d3d12_context->GetRTVHeapManager().Free(&m_write_descriptor); + break; + case Texture::WriteDescriptorType::DSV: + g_d3d12_context->GetDSVHeapManager().Free(&m_write_descriptor); + break; + case Texture::WriteDescriptorType::UAV: + g_d3d12_context->GetDescriptorHeapManager().Free(&m_write_descriptor); + break; + case Texture::WriteDescriptorType::None: + default: + break; + } m_resource.reset(); m_allocation.reset(); @@ -263,7 +311,7 @@ void Texture::Destroy(bool defer /* = true */) m_height = 0; m_levels = 0; m_format = DXGI_FORMAT_UNKNOWN; - m_is_depth_view = false; + m_write_descriptor_type = WriteDescriptorType::None; } void Texture::TransitionToState(ID3D12GraphicsCommandList* cmdlist, D3D12_RESOURCE_STATES state) @@ -448,3 +496,16 @@ bool Texture::CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, g_d3d12_context->GetDevice()->CreateDepthStencilView(resource, &desc, dh->cpu_handle); return true; } + +bool Texture::CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, DescriptorHandle* dh) +{ + if (!g_d3d12_context->GetDescriptorHeapManager().Allocate(dh)) + { + Console.Error("Failed to allocate UAV descriptor"); + return false; + } + + const D3D12_UNORDERED_ACCESS_VIEW_DESC desc = {format, D3D12_UAV_DIMENSION_TEXTURE2D}; + g_d3d12_context->GetDevice()->CreateUnorderedAccessView(resource, nullptr, &desc, dh->cpu_handle); + return true; +} \ No newline at end of file diff --git a/common/D3D12/Texture.h b/common/D3D12/Texture.h index 1e1a608110..8f936cd195 100644 --- a/common/D3D12/Texture.h +++ b/common/D3D12/Texture.h @@ -46,7 +46,7 @@ namespace D3D12 __fi ID3D12Resource* GetResource() const { return m_resource.get(); } __fi D3D12MA::Allocation* GetAllocation() const { return m_allocation.get(); } __fi const DescriptorHandle& GetSRVDescriptor() const { return m_srv_descriptor; } - __fi const DescriptorHandle& GetRTVOrDSVDescriptor() const { return m_rtv_or_dsv_descriptor; } + __fi const DescriptorHandle& GetWriteDescriptor() const { return m_write_descriptor; } __fi D3D12_RESOURCE_STATES GetState() const { return m_state; } __fi u32 GetWidth() const { return m_width; } @@ -83,11 +83,20 @@ namespace D3D12 static bool CreateSRVDescriptor(ID3D12Resource* resource, u32 levels, DXGI_FORMAT format, DescriptorHandle* dh); static bool CreateRTVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, DescriptorHandle* dh); static bool CreateDSVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, DescriptorHandle* dh); + static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, DescriptorHandle* dh); + + enum class WriteDescriptorType : u8 + { + None, + RTV, + DSV, + UAV + }; ComPtr m_resource; ComPtr m_allocation; DescriptorHandle m_srv_descriptor = {}; - DescriptorHandle m_rtv_or_dsv_descriptor = {}; + DescriptorHandle m_write_descriptor = {}; u32 m_width = 0; u32 m_height = 0; u32 m_levels = 0; @@ -95,6 +104,6 @@ namespace D3D12 D3D12_RESOURCE_STATES m_state = D3D12_RESOURCE_STATE_COMMON; - bool m_is_depth_view = false; + WriteDescriptorType m_write_descriptor_type = WriteDescriptorType::None; }; } // namespace D3D12 \ No newline at end of file diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index 94dbeef163..366d8409c7 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -77,6 +77,7 @@ static const char* s_anisotropic_filtering_values[] = {"0", "2", "4", "8", "16", static constexpr int DEFAULT_INTERLACE_MODE = 0; static constexpr int DEFAULT_TV_SHADER_MODE = 0; +static constexpr int DEFAULT_CAS_SHARPNESS = 50; GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* parent) : QWidget(parent) @@ -148,6 +149,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.shadeBoostContrast, "EmuCore/GS", "ShadeBoost_Contrast", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.shadeBoostSaturation, "EmuCore/GS", "ShadeBoost_Saturation", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.tvShader, "EmuCore/GS", "TVShader", DEFAULT_TV_SHADER_MODE); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.casMode, "EmuCore/GS", "CASMode", static_cast(GSCASMode::Disabled)); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.casSharpness, "EmuCore/GS", "CASSharpness", DEFAULT_CAS_SHARPNESS); connect(m_ui.shadeBoost, QOverload::of(&QCheckBox::stateChanged), this, &GraphicsSettingsWidget::onShadeBoostChanged); onShadeBoostChanged(); diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index f4bbfdcffe..939d650833 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -1239,15 +1239,69 @@ Post-Processing - + + + + CAS Mode: + + + + + + + + + + Disabled + + + + + Sharpen Only (Internal Resolution) + + + + + Sharpen and Upscale (Display Resolution) + + + + + + + + + + Sharpness: + + + + + + + % + + + 100 + + + 50 + + + + + + + + Shade Boost - + @@ -1315,21 +1369,21 @@ - + FXAA - + TV Shader - + diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 4809ee5589..c79b1e069e 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -211,6 +211,13 @@ enum class GSHardwareDownloadMode : u8 Disabled }; +enum class GSCASMode : u8 +{ + Disabled, + SharpenOnly, + SharpenAndResize, +}; + // Template function for casting enumerations to their underlying type template typename std::underlying_type::type enum_cast(Enumeration E) @@ -518,7 +525,9 @@ struct Pcsx2Config UserHacks_TextureInsideRt : 1, FXAA : 1, ShadeBoost : 1, +#ifndef PCSX2_CORE ShaderFX : 1, +#endif DumpGSData : 1, SaveRT : 1, SaveFrame : 1, @@ -573,6 +582,7 @@ struct Pcsx2Config TexturePreloadingLevel TexturePreloading{TexturePreloadingLevel::Full}; GSDumpCompressionMethod GSDumpCompression{GSDumpCompressionMethod::Zstandard}; GSHardwareDownloadMode HWDownloadMode{GSHardwareDownloadMode::Enabled}; + GSCASMode CASMode{GSCASMode::Disabled}; int Dithering{2}; int MaxAnisotropy{0}; int SWExtraThreads{2}; @@ -592,6 +602,7 @@ struct Pcsx2Config int OverrideTextureBarriers{-1}; int OverrideGeometryShaders{-1}; + int CAS_Sharpness{50}; int ShadeBoost_Brightness{50}; int ShadeBoost_Contrast{50}; int ShadeBoost_Saturation{50}; diff --git a/pcsx2/Frontend/D3D12HostDisplay.cpp b/pcsx2/Frontend/D3D12HostDisplay.cpp index f42a09660a..e4a0830395 100644 --- a/pcsx2/Frontend/D3D12HostDisplay.cpp +++ b/pcsx2/Frontend/D3D12HostDisplay.cpp @@ -567,8 +567,8 @@ bool D3D12HostDisplay::BeginPresent(bool frame_skip) ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); swap_chain_buf.TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(swap_chain_buf.GetRTVOrDSVDescriptor(), clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &swap_chain_buf.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); + cmdlist->ClearRenderTargetView(swap_chain_buf.GetWriteDescriptor(), clear_color.data(), 0, nullptr); + cmdlist->OMSetRenderTargets(1, &swap_chain_buf.GetWriteDescriptor().cpu_handle, FALSE, nullptr); const D3D12_VIEWPORT vp{0.0f, 0.0f, static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height), 0.0f, 1.0f}; const D3D12_RECT scissor{0, 0, static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)}; diff --git a/pcsx2/Frontend/FullscreenUI.cpp b/pcsx2/Frontend/FullscreenUI.cpp index 7fb235acb2..f017659860 100644 --- a/pcsx2/Frontend/FullscreenUI.cpp +++ b/pcsx2/Frontend/FullscreenUI.cpp @@ -2795,7 +2795,7 @@ void FullscreenUI::DrawGraphicsSettingsPage() static constexpr const char* s_vsync_values[] = {"Off", "On", "Adaptive"}; static constexpr const char* s_deinterlacing_options[] = {"Automatic (Default)", "None", "Weave (Top Field First, Sawtooth)", "Weave (Bottom Field First, Sawtooth)", "Bob (Top Field First)", "Bob (Bottom Field First)", "Blend (Top Field First, Half FPS)", - "Blend (Bottom Field First, Half FPS)", "Adaptive (Top Field First", "Adaptive (Bottom Field First)"}; + "Blend (Bottom Field First, Half FPS)", "Adaptive (Top Field First)", "Adaptive (Bottom Field First)"}; static const char* s_resolution_options[] = { "Native (PS2)", "1.25x Native", @@ -3032,8 +3032,21 @@ void FullscreenUI::DrawGraphicsSettingsPage() MenuHeading("Post-Processing"); { - const bool shadeboost_active = GetEffectiveBoolSetting(bsi, "EmuCore/GS", "ShadeBoost", false); + static constexpr const char* s_cas_options[] = { + "Disabled", "Sharpen Only (Internal Resolution)", "Sharpen and Resize (Display Resolution)"}; + const bool cas_active = (GetEffectiveIntSetting(bsi, "EmuCore/GS", "CASMode", 0) != static_cast(GSCASMode::Disabled)); + DrawToggleSetting(bsi, "FXAA", "Enables FXAA post-processing shader.", "EmuCore/GS", "fxaa", false); + DrawIntListSetting(bsi, "Contrast Adaptive Sharpening", "Enables FidelityFX Contrast Adaptive Sharpening.", "EmuCore/GS", "CASMode", + static_cast(GSCASMode::Disabled), s_cas_options, std::size(s_cas_options)); + DrawIntSpinBoxSetting(bsi, "CAS Sharpness", "Determines the intensity the sharpening effect in CAS post-processing.", "EmuCore/GS", + "CASSharpness", 50, 0, 100, 1, "%d%%", cas_active); + } + + MenuHeading("Filters"); + { + const bool shadeboost_active = GetEffectiveBoolSetting(bsi, "EmuCore/GS", "ShadeBoost", false); + DrawToggleSetting(bsi, "Shade Boost", "Enables brightness/contrast/saturation adjustment.", "EmuCore/GS", "ShadeBoost", false); DrawIntRangeSetting(bsi, "Shade Boost Brightness", "Adjusts brightness. 50 is normal.", "EmuCore/GS", "ShadeBoost_Brightness", 50, 1, 100, "%d", shadeboost_active); @@ -3847,8 +3860,7 @@ void FullscreenUI::DrawGameFixesSettingsPage() DrawToggleSetting(bsi, "EE Timing Hack", "Known to affect following games: Digital Devil Saga (Fixes FMV and crashes), SSX (Fixes bad graphics and crashes).", "EmuCore/Gamefixes", "EETimingHack", false); - DrawToggleSetting(bsi, "Instant DMA Hack", - "Known to affect following games: Fire Pro Wrestling Z (Bad ring graphics).", + DrawToggleSetting(bsi, "Instant DMA Hack", "Known to affect following games: Fire Pro Wrestling Z (Bad ring graphics).", "EmuCore/Gamefixes", "InstantDMAHack", false); DrawToggleSetting(bsi, "Handle DMAC writes when it is busy.", "Known to affect following games: Mana Khemia 1 (Going \"off campus\"), Metal Saga (Intro FMV), Pilot Down Behind Enemy Lines.", diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 14b928c08c..4f841a7bdf 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -17,6 +17,8 @@ #include "GSDevice.h" #include "GS/GSGL.h" #include "GS/GS.h" +#include "Host.h" +#include "common/StringUtil.h" const char* shaderName(ShaderConvert value) { @@ -81,13 +83,8 @@ GSDevice::GSDevice() = default; GSDevice::~GSDevice() { - PurgePool(); - - delete m_merge; - delete m_weavebob; - delete m_blend; - delete m_mad; - delete m_target_tmp; + // should've been cleaned up in Destroy() + pxAssert(m_pool.empty() && !m_merge && !m_weavebob && !m_blend && !m_mad && !m_target_tmp && !m_cas); } bool GSDevice::Create() @@ -97,21 +94,8 @@ bool GSDevice::Create() void GSDevice::Destroy() { + ClearCurrent(); PurgePool(); - - delete m_merge; - delete m_weavebob; - delete m_blend; - delete m_mad; - delete m_target_tmp; - - m_merge = nullptr; - m_weavebob = nullptr; - m_blend = nullptr; - m_mad = nullptr; - m_target_tmp = nullptr; - - m_current = nullptr; // current is special, points to other textures, no need to delete } void GSDevice::ResetAPIState() @@ -317,12 +301,14 @@ void GSDevice::ClearCurrent() delete m_blend; delete m_mad; delete m_target_tmp; + delete m_cas; m_merge = nullptr; m_weavebob = nullptr; m_blend = nullptr; m_mad = nullptr; m_target_tmp = nullptr; + m_cas = nullptr; } void GSDevice::Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) @@ -403,6 +389,8 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse } } +#ifndef PCSX2_CORE + void GSDevice::ExternalFX() { const GSVector2i s = m_current->GetSize(); @@ -417,6 +405,8 @@ void GSDevice::ExternalFX() } } +#endif + void GSDevice::FXAA() { const GSVector2i s = m_current->GetSize(); @@ -519,6 +509,62 @@ void GSDevice::SetHWDrawConfigForAlphaPass(GSHWDrawConfig::PSSelector* ps, } } +// Kinda grotty, but better than copy/pasting the relevant bits in.. +#define A_CPU 1 +#include "bin/resources/shaders/common/ffx_a.h" +#include "bin/resources/shaders/common/ffx_cas.h" + +bool GSDevice::GetCASShaderSource(std::string* source) +{ + std::optional ffx_a_source(Host::ReadResourceFileToString("shaders/common/ffx_a.h")); + std::optional ffx_cas_source(Host::ReadResourceFileToString("shaders/common/ffx_cas.h")); + if (!ffx_a_source.has_value() || !ffx_cas_source.has_value()) + return false; + + // Since our shader compilers don't support includes, and OpenGL doesn't at all... we'll do a really cheeky string replace. + StringUtil::ReplaceAll(source, "#include \"ffx_a.h\"", ffx_a_source.value()); + StringUtil::ReplaceAll(source, "#include \"ffx_cas.h\"", ffx_cas_source.value()); + return true; +} + +void GSDevice::CAS(GSTexture*& tex, GSVector4i& src_rect, GSVector4& src_uv, const GSVector4& draw_rect, bool sharpen_only) +{ + const int dst_width = sharpen_only ? src_rect.width() : static_cast(std::ceil(draw_rect.z - draw_rect.x)); + const int dst_height = sharpen_only ? src_rect.height() : static_cast(std::ceil(draw_rect.w - draw_rect.y)); + const int src_offset_x = static_cast(src_rect.x); + const int src_offset_y = static_cast(src_rect.y); + + GSTexture* src_tex = tex; + if (!m_cas || m_cas->GetWidth() != dst_width || m_cas->GetHeight() != dst_height) + { + delete m_cas; + m_cas = CreateSurface(GSTexture::Type::RWTexture, dst_width, dst_height, 1, GSTexture::Format::Color); + if (!m_cas) + { + Console.Error("Failed to allocate CAS RW texture."); + return; + } + } + + std::array consts; + CasSetup(&consts[0], &consts[4], static_cast(GSConfig.CAS_Sharpness) * 0.01f, + static_cast(src_rect.width()), static_cast(src_rect.height()), + static_cast(dst_width), static_cast(dst_height)); + consts[8] = static_cast(src_offset_x); + consts[9] = static_cast(src_offset_y); + + if (!DoCAS(src_tex, m_cas, sharpen_only, consts)) + { + // leave textures intact if we failed + Console.Warning("Applying CAS failed."); + return; + } + + tex = m_cas; + src_rect = GSVector4i(0, 0, dst_width, dst_height); + src_uv = GSVector4(0.0f, 0.0f, 1.0f, 1.0f); +} + GSAdapter::operator std::string() const { char buf[sizeof "12345678:12345678:12345678:12345678"]; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index dca0e7082c..a5b2478b4d 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -710,6 +710,7 @@ public: bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers. bool dual_source_blend : 1; ///< Can use alpha output as a blend factor. bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE. + bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening. FeatureSupport() { memset(this, 0, sizeof(*this)); @@ -740,6 +741,7 @@ protected: static constexpr int NUM_INTERLACE_SHADERS = 5; static constexpr float MAD_SENSITIVITY = 0.08f; static constexpr u32 MAX_POOLED_TEXTURES = 300; + static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment GSTexture* m_merge = nullptr; GSTexture* m_weavebob = nullptr; @@ -747,6 +749,7 @@ protected: GSTexture* m_mad = nullptr; GSTexture* m_target_tmp = nullptr; GSTexture* m_current = nullptr; + GSTexture* m_cas = nullptr; struct { size_t stride, start, count, limit; @@ -766,7 +769,15 @@ protected: virtual void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset, int bufIdx) = 0; virtual void DoFXAA(GSTexture* sTex, GSTexture* dTex) {} virtual void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) {} +#ifndef PCSX2_CORE virtual void DoExternalFX(GSTexture* sTex, GSTexture* dTex) {} +#endif + + /// Resolves CAS shader includes for the specified source. + static bool GetCASShaderSource(std::string* source); + + /// Applies CAS and writes to the destination texture, which should be a RWTexture. + virtual bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) = 0; public: GSDevice(); @@ -847,7 +858,11 @@ public: void Interlace(const GSVector2i& ds, int field, int mode, float yoffset); void FXAA(); void ShadeBoost(); +#ifndef PCSX2_CORE void ExternalFX(); +#endif + + void CAS(GSTexture*& tex, GSVector4i& src_rect, GSVector4& src_uv, const GSVector4& draw_rect, bool sharpen_only); bool ResizeTexture(GSTexture** t, GSTexture::Type type, int w, int h, bool clear = true, bool prefer_reuse = false); bool ResizeTexture(GSTexture** t, int w, int h, bool prefer_reuse = false); diff --git a/pcsx2/GS/Renderers/Common/GSRenderer.cpp b/pcsx2/GS/Renderers/Common/GSRenderer.cpp index 47e2b62748..1fc225becd 100644 --- a/pcsx2/GS/Renderers/Common/GSRenderer.cpp +++ b/pcsx2/GS/Renderers/Common/GSRenderer.cpp @@ -375,38 +375,34 @@ bool GSRenderer::Merge(int field) m_real_size = GSVector2i(fs.x, is_interlaced_resolution ? ds.y : fs.y); - if (tex[0] || tex[1]) + if (!tex[0] && !tex[1]) + return false; + + if ((tex[0] == tex[1]) && (src_out_rect[0] == src_out_rect[1]).alltrue() && (dst[0] == dst[1]).alltrue() && !feedback_merge && !slbg) { - if ((tex[0] == tex[1]) && (src_out_rect[0] == src_out_rect[1]).alltrue() && (dst[0] == dst[1]).alltrue() && !feedback_merge && !slbg) - { - // the two outputs are identical, skip drawing one of them (the one that is alpha blended) + // the two outputs are identical, skip drawing one of them (the one that is alpha blended) - tex[0] = NULL; - } - - GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; - - g_gs_device->Merge(tex, src_gs_read, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); - - if (isReallyInterlaced() && GSConfig.InterlaceMode != GSInterlaceMode::Off) - g_gs_device->Interlace(ds, field ^ field2, mode, offset); - - if (GSConfig.ShadeBoost) - { - g_gs_device->ShadeBoost(); - } - - if (GSConfig.ShaderFX) - { - g_gs_device->ExternalFX(); - } - - if (GSConfig.FXAA) - { - g_gs_device->FXAA(); - } + tex[0] = NULL; } + GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; + + g_gs_device->Merge(tex, src_gs_read, dst, fs, m_regs->PMODE, m_regs->EXTBUF, c); + + if (isReallyInterlaced() && GSConfig.InterlaceMode != GSInterlaceMode::Off) + g_gs_device->Interlace(ds, field ^ field2, mode, offset); + + if (GSConfig.ShadeBoost) + g_gs_device->ShadeBoost(); + +#ifndef PCSX2_CORE + if (GSConfig.ShaderFX) + g_gs_device->ExternalFX(); +#endif + + if (GSConfig.FXAA) + g_gs_device->FXAA(); + if (m_scanmask_used) m_scanmask_used--; @@ -616,18 +612,33 @@ void GSRenderer::VSync(u32 field, bool registers_written) if ((g_perfmon.GetFrame() & 0x1f) == 0) g_perfmon.Update(); + // Little bit ugly, but we can't do CAS inside the render pass. + GSVector4i src_rect; + GSVector4 src_uv, draw_rect; + GSTexture* current = g_gs_device->GetCurrent(); + if (current && !blank_frame) + { + src_rect = CalculateDrawSrcRect(current); + src_uv = GSVector4(src_rect) / GSVector4(current->GetSize()).xyxy(); + draw_rect = CalculateDrawDstRect(g_host_display->GetWindowWidth(), g_host_display->GetWindowHeight(), + src_rect, current->GetSize(), g_host_display->GetDisplayAlignment(), g_host_display->UsesLowerLeftOrigin(), + GetVideoMode() == GSVideoMode::SDTV_480P || (GSConfig.PCRTCOverscan && GSConfig.PCRTCOffsets)); + + if (GSConfig.CASMode != GSCASMode::Disabled && g_gs_device->Features().cas_sharpening) + { + // sharpen only if the IR is higher than the display resolution + const bool sharpen_only = (GSConfig.CASMode == GSCASMode::SharpenOnly || + (current->GetWidth() > g_host_display->GetWindowWidth() && + current->GetHeight() > g_host_display->GetWindowHeight())); + g_gs_device->CAS(current, src_rect, src_uv, draw_rect, sharpen_only); + } + } + g_gs_device->ResetAPIState(); if (Host::BeginPresentFrame(false)) { - GSTexture* current = g_gs_device->GetCurrent(); if (current && !blank_frame) { - const GSVector4i src_rect(CalculateDrawSrcRect(current)); - const GSVector4 src_uv(GSVector4(src_rect) / GSVector4(current->GetSize()).xyxy()); - const GSVector4 draw_rect(CalculateDrawDstRect(g_host_display->GetWindowWidth(), g_host_display->GetWindowHeight(), - src_rect, current->GetSize(), g_host_display->GetDisplayAlignment(), g_host_display->UsesLowerLeftOrigin(), - GetVideoMode() == GSVideoMode::SDTV_480P || (GSConfig.PCRTCOverscan && GSConfig.PCRTCOffsets))); - const u64 current_time = Common::Timer::GetCurrentValue(); const float shader_time = static_cast(Common::Timer::ConvertValueToSeconds(current_time - m_shader_time_start)); diff --git a/pcsx2/GS/Renderers/Common/GSTexture.h b/pcsx2/GS/Renderers/Common/GSTexture.h index 6b22539d49..30aa682b70 100644 --- a/pcsx2/GS/Renderers/Common/GSTexture.h +++ b/pcsx2/GS/Renderers/Common/GSTexture.h @@ -33,6 +33,7 @@ public: DepthStencil, Texture, Offscreen, + RWTexture, }; enum class Format : u8 diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 99a9f04784..875d0f3dfb 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -23,6 +23,7 @@ #include "Host.h" #include "HostDisplay.h" #include "ShaderCacheVersion.h" +#include "common/Path.h" #include "common/StringUtil.h" #include #include @@ -350,6 +351,7 @@ bool GSDevice11::Create() return false; } + CreateCASShaders(); return true; } @@ -507,6 +509,9 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height desc.Usage = D3D11_USAGE_STAGING; desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; break; + case GSTexture::Type::RWTexture: + desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + break; } GSTexture11* t = nullptr; @@ -839,6 +844,8 @@ void GSDevice11::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader].get(), m_interlace.cb.get(), linear); } +#ifndef PCSX2_CORE + void GSDevice11::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { const GSVector2i s = dTex->GetSize(); @@ -885,6 +892,8 @@ void GSDevice11::DoExternalFX(GSTexture* sTex, GSTexture* dTex) StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps.get(), m_shaderfx.cb.get(), true); } +#endif + void GSDevice11::DoFXAA(GSTexture* sTex, GSTexture* dTex) { const GSVector2i s = dTex->GetSize(); @@ -925,6 +934,55 @@ void GSDevice11::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float para StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps.get(), m_shadeboost.cb.get(), true); } +bool GSDevice11::CreateCASShaders() +{ + CD3D11_BUFFER_DESC desc(NUM_CAS_CONSTANTS * sizeof(u32), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + HRESULT hr = m_dev->CreateBuffer(&desc, nullptr, m_cas.cb.put()); + if (FAILED(hr)) + return false; + + std::optional cas_source(Host::ReadResourceFileToString("shaders/dx11/cas.hlsl")); + if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value())) + return false; + + static constexpr D3D_SHADER_MACRO sharpen_only_macros[] = { + {"CAS_SHARPEN_ONLY", "1"}, + {nullptr, nullptr}}; + + m_cas.cs_sharpen = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), sharpen_only_macros, "main"); + m_cas.cs_upscale = m_shader_cache.GetComputeShader(m_dev.get(), cas_source.value(), nullptr, "main"); + if (!m_cas.cs_sharpen || !m_cas.cs_upscale) + return false; + + m_features.cas_sharpening = true; + return true; +} + +bool GSDevice11::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) +{ + static const int threadGroupWorkRegionDim = 16; + const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + ID3D11ShaderResourceView* srvs[1] = {*static_cast(sTex)}; + ID3D11UnorderedAccessView* uavs[1] = {*static_cast(dTex)}; + m_ctx->OMSetRenderTargets(0, nullptr, nullptr); + m_ctx->UpdateSubresource(m_cas.cb.get(), 0, nullptr, constants.data(), 0, 0); + m_ctx->CSSetConstantBuffers(0, 1, m_cas.cb.addressof()); + m_ctx->CSSetShader(sharpen_only ? m_cas.cs_sharpen.get() : m_cas.cs_upscale.get(), nullptr, 0); + m_ctx->CSSetShaderResources(0, std::size(srvs), srvs); + m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr); + m_ctx->Dispatch(dispatchX, dispatchY, 1); + + // clear bindings out to prevent hazards + uavs[0] = nullptr; + srvs[0] = nullptr; + m_ctx->CSSetShaderResources(0, std::size(srvs), srvs); + m_ctx->CSSetUnorderedAccessViews(0, std::size(uavs), uavs, nullptr); + + return true; +} + void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.h b/pcsx2/GS/Renderers/DX11/GSDevice11.h index de1b240a3f..66b88214f8 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.h +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.h @@ -122,7 +122,12 @@ private: void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0, int bufIdx = 0) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; +#ifndef PCSX2_CORE void DoExternalFX(GSTexture* sTex, GSTexture* dTex) final; +#endif + + bool CreateCASShaders(); + bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) final; wil::com_ptr_nothrow m_dev; wil::com_ptr_nothrow m_ctx; @@ -210,6 +215,13 @@ private: wil::com_ptr_nothrow primid_init_ps[2]; } m_date; + struct + { + wil::com_ptr_nothrow cb; + wil::com_ptr_nothrow cs_upscale; + wil::com_ptr_nothrow cs_sharpen; + } m_cas; + // Shaders... std::unordered_map m_vs; diff --git a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp index 811c44792a..b54b92dbbe 100644 --- a/pcsx2/GS/Renderers/DX11/GSTexture11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTexture11.cpp @@ -258,6 +258,14 @@ GSTexture11::operator ID3D11DepthStencilView*() return m_dsv.get(); } +GSTexture11::operator ID3D11UnorderedAccessView*() +{ + if (!m_uav) + GSDevice11::GetInstance()->GetD3DDevice()->CreateUnorderedAccessView(m_texture.get(), nullptr, m_uav.put()); + + return m_uav.get(); +} + bool GSTexture11::Equal(GSTexture11* tex) { return tex && m_texture == tex->m_texture; diff --git a/pcsx2/GS/Renderers/DX11/GSTexture11.h b/pcsx2/GS/Renderers/DX11/GSTexture11.h index 3bb2211ca6..7f4edf3404 100644 --- a/pcsx2/GS/Renderers/DX11/GSTexture11.h +++ b/pcsx2/GS/Renderers/DX11/GSTexture11.h @@ -26,6 +26,7 @@ class GSTexture11 final : public GSTexture wil::com_ptr_nothrow m_srv; wil::com_ptr_nothrow m_rtv; wil::com_ptr_nothrow m_dsv; + wil::com_ptr_nothrow m_uav; D3D11_TEXTURE2D_DESC m_desc; int m_mapped_subresource; @@ -47,4 +48,5 @@ public: operator ID3D11ShaderResourceView*(); operator ID3D11RenderTargetView*(); operator ID3D11DepthStencilView*(); + operator ID3D11UnorderedAccessView*(); }; diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 801954e823..f788d400db 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -156,6 +156,7 @@ bool GSDevice12::Create() return false; } + CompileCASPipelines(); InitializeState(); InitializeSamplers(); return true; @@ -787,6 +788,82 @@ void GSDevice12::DoFXAA(GSTexture* sTex, GSTexture* dTex) static_cast(dTex)->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } +bool GSDevice12::CompileCASPipelines() +{ + D3D12::RootSignatureBuilder rsb; + rsb.Add32BitConstants(0, NUM_CAS_CONSTANTS, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_ALL); + rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, 1, D3D12_SHADER_VISIBILITY_ALL); + m_cas_root_signature = rsb.Create(false); + if (!m_cas_root_signature) + return false; + + std::optional cas_source(Host::ReadResourceFileToString("shaders/dx11/cas.hlsl")); + if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value())) + return false; + + static constexpr D3D_SHADER_MACRO sharpen_only_macros[] = { + {"CAS_SHARPEN_ONLY", "1"}, + {nullptr, nullptr}}; + + const ComPtr cs_upscale(m_shader_cache.GetComputeShader(cas_source.value(), nullptr, "main")); + const ComPtr cs_sharpen(m_shader_cache.GetComputeShader(cas_source.value(), sharpen_only_macros, "main")); + if (!cs_upscale || !cs_sharpen) + return false; + + D3D12::ComputePipelineBuilder cpb; + cpb.SetRootSignature(m_cas_root_signature.get()); + cpb.SetShader(cs_upscale->GetBufferPointer(), cs_upscale->GetBufferSize()); + m_cas_upscale_pipeline = cpb.Create(g_d3d12_context->GetDevice(), m_shader_cache, false); + cpb.SetShader(cs_sharpen->GetBufferPointer(), cs_sharpen->GetBufferSize()); + m_cas_sharpen_pipeline = cpb.Create(g_d3d12_context->GetDevice(), m_shader_cache, false); + if (!m_cas_upscale_pipeline || !m_cas_sharpen_pipeline) + return false; + + m_features.cas_sharpening = true; + return true; +} + +bool GSDevice12::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) +{ + EndRenderPass(); + + GSTexture12* const sTex12 = static_cast(sTex); + GSTexture12* const dTex12 = static_cast(dTex); + D3D12::DescriptorHandle sTexDH, dTexDH; + if (!GetTextureGroupDescriptors(&sTexDH, &sTex12->GetTexture().GetSRVDescriptor(), 1) || + !GetTextureGroupDescriptors(&dTexDH, &dTex12->GetTexture().GetWriteDescriptor(), 1)) + { + ExecuteCommandList(false, "Ran out of descriptors for CAS"); + if (!GetTextureGroupDescriptors(&sTexDH, &sTex12->GetTexture().GetSRVDescriptor(), 1) || + !GetTextureGroupDescriptors(&dTexDH, &dTex12->GetTexture().GetWriteDescriptor(), 1)) + { + Console.Error("Failed to allocate CAS descriptors."); + return false; + } + } + + ID3D12GraphicsCommandList* const cmdlist = g_d3d12_context->GetCommandList(); + const D3D12_RESOURCE_STATES old_state = sTex12->GetTexture().GetState(); + sTex12->GetTexture().TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + dTex12->GetTexture().TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + cmdlist->SetComputeRootSignature(m_cas_root_signature.get()); + cmdlist->SetComputeRoot32BitConstants(CAS_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS, NUM_CAS_CONSTANTS, constants.data(), 0); + cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE, sTexDH); + cmdlist->SetComputeRootDescriptorTable(CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE, dTexDH); + cmdlist->SetPipelineState(sharpen_only ? m_cas_sharpen_pipeline.get() : m_cas_upscale_pipeline.get()); + m_dirty_flags |= DIRTY_FLAG_PIPELINE; + + static const int threadGroupWorkRegionDim = 16; + const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmdlist->Dispatch(dispatchX, dispatchY, 1); + + sTex12->GetTexture().TransitionToState(cmdlist, old_state); + return true; +} + void GSDevice12::IASetVertexBuffer(const void* vertex, size_t stride, size_t count) { const u32 size = static_cast(stride) * static_cast(count); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.h b/pcsx2/GS/Renderers/DX12/GSDevice12.h index b255fd3822..78c23ed4ca 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.h +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.h @@ -131,6 +131,10 @@ public: UTILITY_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0, UTILITY_ROOT_SIGNATURE_PARAM_PS_TEXTURES = 1, UTILITY_ROOT_SIGNATURE_PARAM_PS_SAMPLERS = 2, + + CAS_ROOT_SIGNATURE_PARAM_PUSH_CONSTANTS = 0, + CAS_ROOT_SIGNATURE_PARAM_SRC_TEXTURE = 1, + CAS_ROOT_SIGNATURE_PARAM_DST_TEXTURE = 2 }; private: @@ -168,6 +172,10 @@ private: std::unordered_map, GSHWDrawConfig::PSSelectorHash> m_tfx_pixel_shaders; std::unordered_map, PipelineSelectorHash> m_tfx_pipelines; + ComPtr m_cas_root_signature; + ComPtr m_cas_upscale_pipeline; + ComPtr m_cas_sharpen_pipeline; + GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache; GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache; @@ -185,6 +193,8 @@ private: void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; + bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) final; + bool GetSampler(D3D12::DescriptorHandle* cpu_handle, GSHWDrawConfig::SamplerSelector ss); void ClearSamplerCache() final; bool GetTextureGroupDescriptors(D3D12::DescriptorHandle* gpu_handle, const D3D12::DescriptorHandle* cpu_handles, u32 count); @@ -208,6 +218,7 @@ private: bool CompileInterlacePipelines(); bool CompileMergePipelines(); bool CompilePostProcessingPipelines(); + bool CompileCASPipelines(); bool CheckStagingBufferSize(u32 required_size); bool MapStagingBuffer(u32 size_to_read); diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp index 7346d9a38f..dd1be966c1 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.cpp @@ -99,6 +99,21 @@ std::unique_ptr GSTexture12::Create(Type type, u32 width, u32 heigh return std::make_unique(type, format, std::move(texture)); } + case Type::RWTexture: + { + pxAssert(levels == 1); + + D3D12::Texture texture; + if (!texture.Create(width, height, levels, d3d_format, srv_format, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12MA::ALLOCATION_FLAG_COMMITTED)) + { + return {}; + } + + D3D12::SetObjectNameFormatted(texture.GetResource(), "%ux%u RW texture", width, height); + return std::make_unique(type, format, std::move(texture)); + } + default: return {}; } @@ -381,12 +396,12 @@ void GSTexture12::CommitClear(ID3D12GraphicsCommandList* cmdlist) if (IsDepthStencil()) { m_texture.TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); - cmdlist->ClearDepthStencilView(m_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_clear_value.depth, 0, 0, nullptr); + cmdlist->ClearDepthStencilView(m_texture.GetWriteDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_clear_value.depth, 0, 0, nullptr); } else { m_texture.TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(m_texture.GetRTVOrDSVDescriptor(), m_clear_value.color, 0, nullptr); + cmdlist->ClearRenderTargetView(m_texture.GetWriteDescriptor(), m_clear_value.color, 0, nullptr); } SetState(GSTexture::State::Dirty); diff --git a/pcsx2/GS/Renderers/DX12/GSTexture12.h b/pcsx2/GS/Renderers/DX12/GSTexture12.h index 09e16d2f6c..544ccb5ca8 100644 --- a/pcsx2/GS/Renderers/DX12/GSTexture12.h +++ b/pcsx2/GS/Renderers/DX12/GSTexture12.h @@ -38,7 +38,7 @@ public: __fi D3D12::Texture& GetTexture() { return m_texture; } __fi const D3D12::DescriptorHandle& GetSRVDescriptor() const { return m_texture.GetSRVDescriptor(); } - __fi const D3D12::DescriptorHandle& GetRTVOrDSVHandle() const { return m_texture.GetRTVOrDSVDescriptor(); } + __fi const D3D12::DescriptorHandle& GetRTVOrDSVHandle() const { return m_texture.GetWriteDescriptor(); } __fi D3D12_RESOURCE_STATES GetResourceState() const { return m_texture.GetState(); } __fi DXGI_FORMAT GetNativeFormat() const { return m_texture.GetFormat(); } __fi ID3D12Resource* GetResource() const { return m_texture.GetResource(); } diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 9897c06c9a..729c70893b 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -350,7 +350,12 @@ public: void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset, int bufIdx) override; void DoFXAA(GSTexture* sTex, GSTexture* dTex) override; void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) override; + +#ifndef PCSX2_CORE void DoExternalFX(GSTexture* sTex, GSTexture* dTex) override; +#endif + + bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) override; MRCOwned> LoadShader(NSString* name); MRCOwned> MakePipeline(MTLRenderPipelineDescriptor* desc, id vertex, id fragment, NSString* name); diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index b1d070f2f9..60db4d3976 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -615,11 +615,20 @@ void GSDeviceMTL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float par RenderCopy(sTex, m_shadeboost_pipeline, GSVector4i(0, 0, dTex->GetSize().x, dTex->GetSize().y)); } +#ifndef PCSX2_CORE + void GSDeviceMTL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { // TODO: Implement } +#endif + +bool GSDeviceMTL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) +{ + return false; +} + MRCOwned> GSDeviceMTL::LoadShader(NSString* name) { NSError* err = nil; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index e5fd443371..8c78b95c11 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -23,6 +23,7 @@ #include "Host.h" #include "HostDisplay.h" #include "ShaderCacheVersion.h" +#include "IconsFontAwesome5.h" #include #include #include @@ -505,6 +506,12 @@ bool GSDeviceOGL::Create() m_shadeboost.ps.SetName("Shadeboost pipe"); } + if (!CreateCASPrograms() && GSConfig.CASMode != GSCASMode::Disabled) + { + Host::AddIconOSDMessage("CASUnsupported", ICON_FA_EXCLAMATION_TRIANGLE, + "CAS is not available, your graphics driver does not supported the required functionality.", 10.0f); + } + // **************************************************************** // rasterization configuration // **************************************************************** @@ -1425,9 +1432,10 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, true); } +#ifndef PCSX2_CORE + void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { -#ifndef PCSX2_CORE // Lazy compile if (!m_shaderfx.ps.IsValid()) { @@ -1480,9 +1488,10 @@ void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) m_shaderfx.ps.Uniform4f(2, 0.0f, 0.0f, 0.0f, 0.0f); StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, true); -#endif } +#endif + void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) { GL_PUSH("DoShadeBoost"); @@ -1607,6 +1616,67 @@ void GSDeviceOGL::ClearSamplerCache() } } +bool GSDeviceOGL::CreateCASPrograms() +{ + // Image load store and GLSL 420pack is core in GL4.2, no need to check. + m_features.cas_sharpening = GLAD_GL_VERSION_4_2 && GLAD_GL_ARB_compute_shader; + if (!m_features.cas_sharpening) + { + Console.Warning("Compute shaders not supported, CAS is unavailable."); + return false; + } + + std::optional cas_source(Host::ReadResourceFileToString("shaders/opengl/cas.glsl")); + if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value())) + { + m_features.cas_sharpening = false; + return false; + } + + const char* header = + "#version 420\n" + "#extension GL_ARB_compute_shader : require\n"; + const char* sharpen_params[2] = { + "#define CAS_SHARPEN_ONLY false\n", + "#define CAS_SHARPEN_ONLY true\n"}; + + if (!m_shader_cache.GetComputeProgram(&m_cas.upscale_ps, fmt::format("{}{}{}", header, sharpen_params[0], cas_source.value())) || + !m_shader_cache.GetComputeProgram(&m_cas.sharpen_ps, fmt::format("{}{}{}", header, sharpen_params[1], cas_source.value()))) + { + m_features.cas_sharpening = false; + return false; + } + + const auto link_uniforms = [](GL::Program& prog) { + prog.RegisterUniform("const0"); + prog.RegisterUniform("const1"); + prog.RegisterUniform("srcOffset"); + }; + link_uniforms(m_cas.upscale_ps); + link_uniforms(m_cas.sharpen_ps); + + return true; +} + +bool GSDeviceOGL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) +{ + const GL::Program& prog = sharpen_only ? m_cas.sharpen_ps : m_cas.upscale_ps; + prog.Bind(); + prog.Uniform4uiv(0, &constants[0]); + prog.Uniform4uiv(1, &constants[4]); + prog.Uniform2iv(2, reinterpret_cast(&constants[8])); + + PSSetShaderResource(0, sTex); + glBindImageTexture(0, static_cast(dTex)->GetID(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); + + static const int threadGroupWorkRegionDim = 16; + const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + glDispatchCompute(dispatchX, dispatchY, 1); + + return true; +} + void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) { GLuint id = 0; diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h index b97f54b989..b3d01f10d8 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h @@ -275,6 +275,12 @@ private: GL::Program ps; } m_shadeboost; + struct + { + GL::Program upscale_ps; + GL::Program sharpen_ps; + } m_cas; + struct { u16 last_query = 0; @@ -301,7 +307,12 @@ private: void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0, int bufIdx = 0) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; +#ifndef PCSX2_CORE void DoExternalFX(GSTexture* sTex, GSTexture* dTex) final; +#endif + + bool CreateCASPrograms(); + bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) final; void OMAttachRt(GSTextureOGL* rt = NULL); void OMAttachDs(GSTextureOGL* ds = NULL); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 2337b0d65e..3424d39dd4 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -20,6 +20,7 @@ #include "common/Vulkan/SwapChain.h" #include "common/Vulkan/Util.h" #include "common/Align.h" +#include "common/Path.h" #include "common/ScopedGuard.h" #include "GS.h" #include "GSDeviceVK.h" @@ -116,6 +117,8 @@ bool GSDeviceVK::Create() return false; } + CompileCASPipelines(); + InitializeState(); return true; } @@ -1780,6 +1783,117 @@ bool GSDeviceVK::CompilePostProcessingPipelines() return true; } +bool GSDeviceVK::CompileCASPipelines() +{ + VkDevice dev = g_vulkan_context->GetDevice(); + Vulkan::DescriptorSetLayoutBuilder dslb; + Vulkan::PipelineLayoutBuilder plb; + + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT); + dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT); + if ((m_cas_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_cas_pipeline_layout, "CAS descriptor layout"); + + plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, NUM_CAS_CONSTANTS * sizeof(u32)); + plb.AddDescriptorSet(m_cas_ds_layout); + if ((m_cas_pipeline_layout = plb.Create(dev)) == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(dev, m_cas_pipeline_layout, "CAS pipeline layout"); + + // we use specialization constants to avoid compiling it twice + std::optional cas_source(Host::ReadResourceFileToString("shaders/vulkan/cas.glsl")); + if (!cas_source.has_value() || !GetCASShaderSource(&cas_source.value())) + return false; + + VkShaderModule mod = g_vulkan_shader_cache->GetComputeShader(cas_source->c_str()); + ScopedGuard mod_guard = [&mod]() { Vulkan::Util::SafeDestroyShaderModule(mod); }; + if (mod == VK_NULL_HANDLE) + return false; + + for (u8 sharpen_only = 0; sharpen_only < 2; sharpen_only++) + { + Vulkan::ComputePipelineBuilder cpb; + cpb.SetPipelineLayout(m_cas_pipeline_layout); + cpb.SetShader(mod, "main"); + cpb.SetSpecializationBool(0, sharpen_only != 0); + m_cas_pipelines[sharpen_only] = cpb.Create(dev, g_vulkan_shader_cache->GetPipelineCache(true), false); + if (!m_cas_pipelines[sharpen_only]) + return false; + } + + m_features.cas_sharpening = true; + return true; +} + +bool GSDeviceVK::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) +{ + EndRenderPass(); + + VkDescriptorSet ds = g_vulkan_context->AllocateDescriptorSet(m_cas_ds_layout); + if (ds == VK_NULL_HANDLE) + return false; + + GSTextureVK* const sTexVK = static_cast(sTex); + GSTextureVK* const dTexVK = static_cast(dTex); + VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); + + sTexVK->GetTexture().TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // we have to make the barrier explicit here, because there's no free enums for us to use (general already hijacked) + const VkImageMemoryBarrier barrier_to_cs = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + dTexVK->GetLayout(), + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + dTexVK->GetImage(), + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}, + }; + vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier_to_cs); + dTexVK->GetTexture().OverrideImageLayout(VK_IMAGE_LAYOUT_GENERAL); + + // only happening once a frame, so the update isn't a huge deal. + Vulkan::DescriptorSetUpdateBuilder dsub; + dsub.AddImageDescriptorWrite(ds, 0, sTexVK->GetView(), sTexVK->GetLayout()); + dsub.AddStorageImageDescriptorWrite(ds, 1, dTexVK->GetView(), dTexVK->GetLayout()); + dsub.Update(g_vulkan_context->GetDevice(), false); + + // the actual meat and potatoes! only four commands. + static const int threadGroupWorkRegionDim = 16; + const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_cas_pipeline_layout, 0, 1, &ds, 0, nullptr); + vkCmdPushConstants(cmdbuf, m_cas_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, NUM_CAS_CONSTANTS * sizeof(u32), constants.data()); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_cas_pipelines[static_cast(sharpen_only)]); + vkCmdDispatch(cmdbuf, dispatchX, dispatchY, 1); + + // and same deal here :( + const VkImageMemoryBarrier barrier_to_fs = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + dTexVK->GetImage(), + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}, + }; + vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &barrier_to_fs); + dTexVK->GetTexture().OverrideImageLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + + // all done! + return true; +} + bool GSDeviceVK::CheckStagingBufferSize(u32 required_size) { if (m_readback_staging_buffer_size >= required_size) @@ -1866,6 +1980,11 @@ void GSDeviceVK::DestroyResources() Vulkan::Util::SafeDestroyPipeline(m_fxaa_pipeline); Vulkan::Util::SafeDestroyPipeline(m_shadeboost_pipeline); + for (VkPipeline& it : m_cas_pipelines) + Vulkan::Util::SafeDestroyPipeline(it); + Vulkan::Util::SafeDestroyPipelineLayout(m_cas_pipeline_layout); + Vulkan::Util::SafeDestroyDescriptorSetLayout(m_cas_ds_layout); + for (auto& it : m_samplers) Vulkan::Util::SafeDestroySampler(it.second); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index c54ff27cbd..fbca744888 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -83,6 +83,8 @@ public: INDEX_BUFFER_SIZE = 16 * 1024 * 1024, VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, + + NUM_CAS_PIPELINES = 2, }; enum DATE_RENDER_PASS : u32 { @@ -144,6 +146,10 @@ private: VkRenderPass m_tfx_render_pass[2][2][2][3][2][3][3] = {}; // [rt][ds][hdr][date][fbl][rt_op][ds_op] + VkDescriptorSetLayout m_cas_ds_layout = VK_NULL_HANDLE; + VkPipelineLayout m_cas_pipeline_layout = VK_NULL_HANDLE; + std::array m_cas_pipelines = {}; + GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache; GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache; @@ -159,6 +165,8 @@ private: void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) final; void DoFXAA(GSTexture* sTex, GSTexture* dTex) final; + bool DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array& constants) final; + VkSampler GetSampler(GSHWDrawConfig::SamplerSelector ss); void ClearSamplerCache() final; @@ -182,6 +190,7 @@ private: bool CompileInterlacePipelines(); bool CompileMergePipelines(); bool CompilePostProcessingPipelines(); + bool CompileCASPipelines(); bool CheckStagingBufferSize(u32 required_size); void DestroyStagingBuffer(); diff --git a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp index 63b68fb94c..50f11bb93f 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSTextureVK.cpp @@ -126,6 +126,24 @@ std::unique_ptr GSTextureVK::Create(Type type, u32 width, u32 heigh return std::make_unique(type, format, std::move(texture)); } + case Type::RWTexture: + { + pxAssert(levels == 1); + + Vulkan::Texture texture; + if (!texture.Create(width, height, levels, 1, vk_format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT)) + { + return {}; + } + + Vulkan::Util::SetObjectName( + g_vulkan_context->GetDevice(), texture.GetImage(), "%ux%u RW texture", width, height); + return std::make_unique(type, format, std::move(texture)); + } + default: return {}; } diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index bcde1a01ec..a7b38e0537 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -406,6 +406,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(TexturePreloading) && OpEqu(GSDumpCompression) && OpEqu(HWDownloadMode) && + OpEqu(CASMode) && OpEqu(Dithering) && OpEqu(MaxAnisotropy) && OpEqu(SWExtraThreads) && @@ -425,6 +426,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(OverrideTextureBarriers) && OpEqu(OverrideGeometryShaders) && + OpEqu(CAS_Sharpness) && OpEqu(ShadeBoost_Brightness) && OpEqu(ShadeBoost_Contrast) && OpEqu(ShadeBoost_Saturation) && @@ -568,7 +570,9 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingBoolEx(UserHacks_TextureInsideRt, "UserHacks_TextureInsideRt"); GSSettingBoolEx(FXAA, "fxaa"); GSSettingBool(ShadeBoost); +#ifndef PCSX2_CORE GSSettingBoolEx(ShaderFX, "shaderfx"); +#endif GSSettingBoolEx(DumpGSData, "dump"); GSSettingBoolEx(SaveRT, "save"); GSSettingBoolEx(SaveFrame, "savef"); @@ -600,6 +604,8 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingIntEnumEx(TexturePreloading, "texture_preloading"); GSSettingIntEnumEx(GSDumpCompression, "GSDumpCompression"); GSSettingIntEnumEx(HWDownloadMode, "HWDownloadMode"); + GSSettingIntEnumEx(CASMode, "CASMode"); + GSSettingIntEx(CAS_Sharpness, "CASSharpness"); GSSettingIntEx(Dithering, "dithering_ps2"); GSSettingIntEx(MaxAnisotropy, "MaxAnisotropy"); GSSettingIntEx(SWExtraThreads, "extrathreads");