mirror of https://github.com/PCSX2/pcsx2.git
GS:MTL: CAS support
This commit is contained in:
parent
f7c79fe3cc
commit
2fb8ecbf02
|
@ -794,6 +794,7 @@ if(USE_VULKAN)
|
|||
endif()
|
||||
|
||||
set(pcsx2GSMetalShaders
|
||||
GS/Renderers/Metal/cas.metal
|
||||
GS/Renderers/Metal/convert.metal
|
||||
GS/Renderers/Metal/present.metal
|
||||
GS/Renderers/Metal/merge.metal
|
||||
|
|
|
@ -237,6 +237,7 @@ public:
|
|||
MRCOwned<id<MTLFence>> m_spin_fence;
|
||||
|
||||
// Functions and Pipeline States
|
||||
MRCOwned<id<MTLComputePipelineState>> m_cas_pipeline[2];
|
||||
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
|
||||
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(PresentShader::Count)];
|
||||
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy[2];
|
||||
|
@ -359,6 +360,7 @@ public:
|
|||
|
||||
MRCOwned<id<MTLFunction>> LoadShader(NSString* name);
|
||||
MRCOwned<id<MTLRenderPipelineState>> MakePipeline(MTLRenderPipelineDescriptor* desc, id<MTLFunction> vertex, id<MTLFunction> fragment, NSString* name);
|
||||
MRCOwned<id<MTLComputePipelineState>> MakeComputePipeline(id<MTLFunction> compute, NSString* name);
|
||||
bool Create() override;
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c) override;
|
||||
|
|
|
@ -503,6 +503,9 @@ GSTexture* GSDeviceMTL::CreateSurface(GSTexture::Type type, int width, int heigh
|
|||
else
|
||||
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
|
||||
break;
|
||||
case GSTexture::Type::RWTexture:
|
||||
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite];
|
||||
break;
|
||||
default:
|
||||
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget];
|
||||
}
|
||||
|
@ -625,9 +628,24 @@ void GSDeviceMTL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
|
|||
#endif
|
||||
|
||||
bool GSDeviceMTL::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, const std::array<u32, NUM_CAS_CONSTANTS>& constants)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
{ @autoreleasepool {
|
||||
static constexpr int threadGroupWorkRegionDim = 16;
|
||||
const int dispatchX = (dTex->GetWidth() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
||||
const int dispatchY = (dTex->GetHeight() + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
|
||||
static_assert(sizeof(constants) == sizeof(GSMTLCASPSUniform));
|
||||
|
||||
EndRenderPass();
|
||||
id<MTLComputeCommandEncoder> enc = [GetRenderCmdBuf() computeCommandEncoder];
|
||||
[enc setLabel:@"CAS"];
|
||||
[enc setComputePipelineState:m_cas_pipeline[sharpen_only]];
|
||||
[enc setTexture:static_cast<GSTextureMTL*>(sTex)->GetTexture() atIndex:0];
|
||||
[enc setTexture:static_cast<GSTextureMTL*>(dTex)->GetTexture() atIndex:1];
|
||||
[enc setBytes:&constants length:sizeof(constants) atIndex:GSMTLBufferIndexUniforms];
|
||||
[enc dispatchThreadgroups:MTLSizeMake(dispatchX, dispatchY, 1)
|
||||
threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
|
||||
[enc endEncoding];
|
||||
return true;
|
||||
}}
|
||||
|
||||
MRCOwned<id<MTLFunction>> GSDeviceMTL::LoadShader(NSString* name)
|
||||
{
|
||||
|
@ -658,6 +676,26 @@ MRCOwned<id<MTLRenderPipelineState>> GSDeviceMTL::MakePipeline(MTLRenderPipeline
|
|||
return res;
|
||||
}
|
||||
|
||||
MRCOwned<id<MTLComputePipelineState>> GSDeviceMTL::MakeComputePipeline(id<MTLFunction> compute, NSString* name)
|
||||
{
|
||||
MRCOwned<MTLComputePipelineDescriptor*> desc = MRCTransfer([MTLComputePipelineDescriptor new]);
|
||||
[desc setLabel:name];
|
||||
[desc setComputeFunction:compute];
|
||||
NSError* err;
|
||||
MRCOwned<id<MTLComputePipelineState>> res = MRCTransfer([m_dev.dev
|
||||
newComputePipelineStateWithDescriptor:desc
|
||||
options:0
|
||||
reflection:nil
|
||||
error:&err]);
|
||||
if (unlikely(err))
|
||||
{
|
||||
NSString* msg = [NSString stringWithFormat:@"Failed to create pipeline %@: %@", name, [err localizedDescription]];
|
||||
Console.Error("%s", [msg UTF8String]);
|
||||
throw GSRecoverableError();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void applyAttribute(MTLVertexDescriptor* desc, NSUInteger idx, MTLVertexFormat fmt, NSUInteger offset, NSUInteger buffer_index)
|
||||
{
|
||||
MTLVertexAttributeDescriptor* attrs = desc.attributes[idx];
|
||||
|
@ -704,6 +742,7 @@ bool GSDeviceMTL::Create()
|
|||
m_features.framebuffer_fetch = m_dev.features.framebuffer_fetch;
|
||||
m_features.dual_source_blend = true;
|
||||
m_features.stencil_buffer = true;
|
||||
m_features.cas_sharpening = true;
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -725,12 +764,13 @@ bool GSDeviceMTL::Create()
|
|||
[clearSpinBuffer fillBuffer:m_spin_buffer range:NSMakeRange(0, 4) value:0];
|
||||
[clearSpinBuffer updateFence:m_spin_fence];
|
||||
[clearSpinBuffer endEncoding];
|
||||
NSError* err = nullptr;
|
||||
m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:&err]);
|
||||
if (err)
|
||||
m_spin_pipeline = MakeComputePipeline(LoadShader(@"waste_time"), @"waste_time");
|
||||
|
||||
for (int sharpen_only = 0; sharpen_only < 2; sharpen_only++)
|
||||
{
|
||||
Console.Error("Failed to create spin pipeline: %s", [[err localizedDescription] UTF8String]);
|
||||
return false;
|
||||
setFnConstantB(m_fn_constants, sharpen_only, GSMTLConstantIndex_CAS_SHARPEN_ONLY);
|
||||
NSString* shader = m_dev.features.has_fast_half ? @"CASHalf" : @"CASFloat";
|
||||
m_cas_pipeline[sharpen_only] = MakeComputePipeline(LoadShader(shader), sharpen_only ? @"CAS Sharpen" : @"CAS Upscale");
|
||||
}
|
||||
|
||||
m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]);
|
||||
|
|
|
@ -42,6 +42,7 @@ struct GSMTLDevice
|
|||
bool framebuffer_fetch;
|
||||
bool primid;
|
||||
bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD
|
||||
bool has_fast_half;
|
||||
MetalVersion shader_version;
|
||||
int max_texsize;
|
||||
};
|
||||
|
|
|
@ -152,6 +152,10 @@ GSMTLDevice::GSMTLDevice(MRCOwned<id<MTLDevice>> dev)
|
|||
if ([dev supportsFamily:MTLGPUFamilyApple1])
|
||||
features.framebuffer_fetch = true;
|
||||
|
||||
if (@available(macOS 10.15, iOS 13.0, *))
|
||||
if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1])
|
||||
features.has_fast_half = true; // Approximate guess
|
||||
|
||||
features.shader_version = detectLibraryVersion(shaders);
|
||||
if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23)
|
||||
{
|
||||
|
|
|
@ -57,6 +57,13 @@ struct GSMTLInterlacePSUniform
|
|||
vector_float4 ZrH;
|
||||
};
|
||||
|
||||
struct GSMTLCASPSUniform
|
||||
{
|
||||
vector_uint4 const0;
|
||||
vector_uint4 const1;
|
||||
vector_int2 srcOffset;
|
||||
};
|
||||
|
||||
struct GSMTLMainVertex
|
||||
{
|
||||
vector_float2 st;
|
||||
|
@ -132,6 +139,7 @@ enum class GSMTLExpandType : unsigned char
|
|||
|
||||
enum GSMTLFnConstants
|
||||
{
|
||||
GSMTLConstantIndex_CAS_SHARPEN_ONLY,
|
||||
GSMTLConstantIndex_SCALING_FACTOR,
|
||||
GSMTLConstantIndex_FRAMEBUFFER_FETCH,
|
||||
GSMTLConstantIndex_FST,
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define A_GPU 1
|
||||
#define A_MSL 1
|
||||
#define A_HALF 1
|
||||
|
||||
#include "../../../../bin/resources/shaders/common/ffx_a.h"
|
||||
|
||||
struct CASTextureF
|
||||
{
|
||||
const thread texture2d<float, access::read>& tex;
|
||||
uint2 offset;
|
||||
};
|
||||
|
||||
struct CASTextureH
|
||||
{
|
||||
const thread texture2d<half, access::read>& tex;
|
||||
ushort2 offset;
|
||||
};
|
||||
|
||||
#define CAS_TEXTURE CASTextureF
|
||||
#define CAS_TEXTUREH CASTextureH
|
||||
|
||||
A_STATIC AF3 CasLoad(CASTextureF tex, ASU2 coord)
|
||||
{
|
||||
return tex.tex.read(AU2(coord) + tex.offset).rgb;
|
||||
}
|
||||
#define CasInput(r,g,b)
|
||||
|
||||
A_STATIC AH3 CasLoadH(CASTextureH tex, ASW2 coord)
|
||||
{
|
||||
return tex.tex.read(AW2(coord) + tex.offset).rgb;
|
||||
}
|
||||
|
||||
A_STATIC void CasInputH(inoutAH2 r, inoutAH2 g, inoutAH2 b){}
|
||||
|
||||
#include "../../../../bin/resources/shaders/common/ffx_cas.h"
|
||||
|
||||
#include "GSMTLShaderCommon.h"
|
||||
|
||||
constant bool CAS_SHARPEN_ONLY [[function_constant(GSMTLConstantIndex_CAS_SHARPEN_ONLY)]];
|
||||
|
||||
kernel void CASFloat(
|
||||
uint2 localID [[thread_position_in_threadgroup]],
|
||||
uint2 workgroupID [[threadgroup_position_in_grid]],
|
||||
texture2d<float, access::read> input [[texture(0)]],
|
||||
texture2d<float, access::write> output [[texture(1)]],
|
||||
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
|
||||
{
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
|
||||
const AU4 const0 = cb.const0;
|
||||
const AU4 const1 = cb.const1;
|
||||
const CASTextureF tex{input, AU2(cb.srcOffset)};
|
||||
|
||||
// Filter.
|
||||
float r, g, b;
|
||||
|
||||
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
|
||||
output.write(float4(r, g, b, 1), gxy);
|
||||
gxy.x += 8;
|
||||
|
||||
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
|
||||
output.write(float4(r, g, b, 1), gxy);
|
||||
gxy.y += 8;
|
||||
|
||||
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
|
||||
output.write(float4(r, g, b, 1), gxy);
|
||||
gxy.x -= 8;
|
||||
|
||||
CasFilter(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
|
||||
output.write(float4(r, g, b, 1), gxy);
|
||||
}
|
||||
|
||||
kernel void CASHalf(
|
||||
uint2 localID [[thread_position_in_threadgroup]],
|
||||
uint2 workgroupID [[threadgroup_position_in_grid]],
|
||||
texture2d<half, access::read> input [[texture(0)]],
|
||||
texture2d<half, access::write> output [[texture(1)]],
|
||||
constant GSMTLCASPSUniform& cb [[buffer(GSMTLBufferIndexUniforms)]])
|
||||
{
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
AU2 gxy = ARmp8x8(localID.x) + (workgroupID << 4);
|
||||
const AU4 const0 = cb.const0;
|
||||
const AU4 const1 = cb.const1;
|
||||
const CASTextureH tex{input, AW2(cb.srcOffset)};
|
||||
|
||||
// Filter.
|
||||
half2 r, g, b;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
CasFilterH(tex, r, g, b, gxy, const0, const1, CAS_SHARPEN_ONLY);
|
||||
output.write(half4(r.x, g.x, b.x, 1), gxy);
|
||||
output.write(half4(r.y, g.y, b.y, 1), gxy + AU2(8, 0));
|
||||
gxy.y += 8;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue