GS:MTL: CAS support

This commit is contained in:
TellowKrinkle 2022-11-20 20:15:13 -06:00 committed by refractionpcsx2
parent f7c79fe3cc
commit 2fb8ecbf02
7 changed files with 176 additions and 8 deletions

View File

@ -794,6 +794,7 @@ if(USE_VULKAN)
endif()
set(pcsx2GSMetalShaders
GS/Renderers/Metal/cas.metal
GS/Renderers/Metal/convert.metal
GS/Renderers/Metal/present.metal
GS/Renderers/Metal/merge.metal

View File

@ -237,6 +237,7 @@ public:
MRCOwned<id<MTLFence>> m_spin_fence;
// Functions and Pipeline States
MRCOwned<id<MTLComputePipelineState>> m_cas_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy[2];
@ -359,6 +360,7 @@ public:
MRCOwned<id<MTLFunction>> LoadShader(NSString* name);
MRCOwned<id<MTLRenderPipelineState>> MakePipeline(MTLRenderPipelineDescriptor* desc, id<MTLFunction> vertex, id<MTLFunction> fragment, NSString* name);
MRCOwned<id<MTLComputePipelineState>> MakeComputePipeline(id<MTLFunction> compute, NSString* name);
bool Create() override;
void ClearRenderTarget(GSTexture* t, const GSVector4& c) override;

View File

@ -503,6 +503,9 @@ GSTexture* GSDeviceMTL::CreateSurface(GSTexture::Type type, int width, int heigh
else
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
break;
case GSTexture::Type::RWTexture:
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite];
break;
default:
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
}
@ -625,9 +628,24 @@ void GSDeviceMTL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
#endif
bool GSDeviceMTL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
{
return false;
}
{ @autoreleasepool {
static constexpr int threadGroupWorkRegionDim = 16;
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
static_assert(sizeof(constants) == sizeof(GSMTLCASPSUniform));
EndRenderPass();
id<MTLComputeCommandEncoder> enc = [GetRenderCmdBuf() computeCommandEncoder];
[enc setLabel:@"CAS"];
[enc setComputePipelineState:m_cas_pipeline[sharpen_only]];
[enc setTexture:static_cast<GSTextureMTL*>(sTex)->GetTexture() atIndex:0];
[enc setTexture:static_cast<GSTextureMTL*>(dTex)->GetTexture() atIndex:1];
[enc setBytes:&constants length:sizeof(constants) atIndex:GSMTLBufferIndexUniforms];
[enc dispatchThreadgroups:MTLSizeMake(dispatchX, dispatchY, 1)
threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
[enc endEncoding];
return true;
}}
MRCOwned<id<MTLFunction>> GSDeviceMTL::LoadShader(NSString* name)
{
@ -658,6 +676,26 @@ MRCOwned<id<MTLRenderPipelineState>> GSDeviceMTL::MakePipeline(MTLRenderPipeline
return res;
}
MRCOwned<id<MTLComputePipelineState>> GSDeviceMTL::MakeComputePipeline(id<MTLFunction> compute, NSString* name)
{
MRCOwned<MTLComputePipelineDescriptor*> desc = MRCTransfer([MTLComputePipelineDescriptor new]);
[desc setLabel:name];
[desc setComputeFunction:compute];
NSError* err;
MRCOwned<id<MTLComputePipelineState>> res = MRCTransfer([m_dev.dev
newComputePipelineStateWithDescriptor:desc
options:0
reflection:nil
error:&err]);
if (unlikely(err))
{
NSString* msg = [NSString stringWithFormat:@"Failed to create pipeline %@: %@", name, [err localizedDescription]];
Console.Error("%s", [msg UTF8String]);
throw GSRecoverableError();
}
return res;
}
static void applyAttribute(MTLVertexDescriptor* desc, NSUInteger idx, MTLVertexFormat fmt, NSUInteger offset, NSUInteger buffer_index)
{
MTLVertexAttributeDescriptor* attrs = desc.attributes[idx];
@ -704,6 +742,7 @@ bool GSDeviceMTL::Create()
m_features.framebuffer_fetch = m_dev.features.framebuffer_fetch;
m_features.dual_source_blend = true;
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;
try
{
@ -725,12 +764,13 @@ bool GSDeviceMTL::Create()
[clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0];
[clearSpinBuffer updateFence:m_spin_fence];
[clearSpinBuffer endEncoding];
NSError* err = nullptr;
m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]);
if (err)
m_spin_pipeline = MakeComputePipeline(LoadShader(@"waste_time"), @"waste_time");
for (int sharpen_only = 0; sharpen_only < 2; sharpen_only++)
{
Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]);
return false;
setFnConstantB(m_fn_constants, sharpen_only, GSMTLConstantIndex_CAS_SHARPEN_ONLY);
NSString* shader = m_dev.features.has_fast_half ? @"CASHalf" : @"CASFloat";
m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
}
m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);

View File

@ -42,6 +42,7 @@ struct GSMTLDevice
bool framebuffer_fetch;
bool primid;
bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD
bool has_fast_half;
MetalVersion shader_version;
int max_texsize;
};

View File

@ -152,6 +152,10 @@ GSMTLDevice::GSMTLDevice(MRCOwned<id<MTLDevice>> dev)
if ([dev supportsFamily:MTLGPUFamilyApple1])
features.framebuffer_fetch = true;
if (@available(macOS 10.15, iOS 13.0, *))
if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1])
features.has_fast_half = true; // Approximate guess
features.shader_version = detectLibraryVersion(shaders);
if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23)
{

View File

@ -57,6 +57,13 @@ struct GSMTLInterlacePSUniform
vector_float4 ZrH;
};
struct GSMTLCASPSUniform
{
vector_uint4 const0;
vector_uint4 const1;
vector_int2 srcOffset;
};
struct GSMTLMainVertex
{
vector_float2 st;
@ -132,6 +139,7 @@ enum class GSMTLExpandType : unsigned char
enum GSMTLFnConstants
{
GSMTLConstantIndex_CAS_SHARPEN_ONLY,
GSMTLConstantIndex_SCALING_FACTOR,
GSMTLConstantIndex_FRAMEBUFFER_FETCH,
GSMTLConstantIndex_FST,

View File

@ -0,0 +1,112 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#define A_GPU 1
#define A_MSL 1
#define A_HALF 1
#include "../../../../bin/resources/shaders/common/ffx_a.h"
struct CASTextureF
{
const thread texture2d<float, access::read>& tex;
uint2 offset;
};
struct CASTextureH
{
const thread texture2d<half, access::read>& tex;
ushort2 offset;
};
#define CAS_TEXTURE CASTextureF
#define CAS_TEXTUREH CASTextureH
A_STATIC AF3 CasLoad(CASTextureF tex, ASU2 coord)
{
return tex.tex.read(AU2(coord) + tex.offset).rgb;
}
#define CasInput(r,g,b)
A_STATIC AH3 CasLoadH(CASTextureH tex, ASW2 coord)
{
return tex.tex.read(AW2(coord) + tex.offset).rgb;
}
A_STATIC void CasInputH(inoutAH2 r, inoutAH2 g, inoutAH2 b){}
#include "../../../../bin/resources/shaders/common/ffx_cas.h"
#include "GSMTLShaderCommon.h"
constant bool CAS_SHARPEN_ONLY [[function_constant(GSMTLConstantIndex_CAS_SHARPEN_ONLY)]];
kernel void CASFloat(
uint2 localID [[thread_position_in_threadgroup]],
uint2 workgroupID [[threadgroup_position_in_grid]],
texture2d<float, access::read> input [[texture(0)]],
texture2d<float, access::write> output [[texture(1)]],
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
const AU4 const0 = cb.const0;
const AU4 const1 = cb.const1;
const CASTextureF tex{input, AU2(cb.srcOffset)};
// Filter.
float r, g, b;
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.x += 8;
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.y += 8;
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
gxy.x -= 8;
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(float4(r, g, b, 1), gxy);
}
kernel void CASHalf(
uint2 localID [[thread_position_in_threadgroup]],
uint2 workgroupID [[threadgroup_position_in_grid]],
texture2d<half, access::read> input [[texture(0)]],
texture2d<half, access::write> output [[texture(1)]],
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
const AU4 const0 = cb.const0;
const AU4 const1 = cb.const1;
const CASTextureH tex{input, AW2(cb.srcOffset)};
// Filter.
half2 r, g, b;
#pragma unroll
for (int i = 0; i < 2; i++)
{
CasFilterH(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
output.write(half4(r.x, g.x, b.x, 1), gxy);
output.write(half4(r.y, g.y, b.y, 1), gxy + AU2(8, 0));
gxy.y += 8;
}
}