GS:MTL: Implement clut shaders

This commit is contained in:
TellowKrinkle 2023-01-12 19:43:55 +01:00 committed by TellowKrinkle
parent e20c2210f5
commit a2e3522862
4 changed files with 59 additions and 0 deletions

View File

@ -242,6 +242,7 @@ public:
MRCOwned<id<MTLRenderPipelineState>> m_merge_pipeline[4];
MRCOwned<id<MTLRenderPipelineState>> m_interlace_pipeline[NUM_INTERLACE_SHADERS];
MRCOwned<id<MTLRenderPipelineState>> m_datm_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_clut_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_stencil_clear_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_primid_init_pipeline[2][2];
MRCOwned<id<MTLRenderPipelineState>> m_hdr_init_pipeline;
@ -371,6 +372,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void FlushClears(GSTexture* tex);

View File

@ -26,6 +26,11 @@
#ifdef __APPLE__
#include "GSMTLSharedHeader.h"
static constexpr simd::float2 ToSimd(const GSVector2& vec)
{
return simd::make_float2(vec.x, vec.y);
}
static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status)
{
switch (status)
@ -918,6 +923,8 @@ bool GSDeviceMTL::Create()
m_hdr_resolve_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_resolve"), @"HDR Resolve");
m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa");
m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost");
m_clut_pipeline[0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_4"), @"4-bit CLUT Update");
m_clut_pipeline[1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_8"), @"8-bit CLUT Update");
pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::HDRColor);
m_hdr_init_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_init"), @"HDR Init");
pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid;
@ -954,6 +961,8 @@ bool GSDeviceMTL::Create()
case ShaderConvert::Count:
case ShaderConvert::DATM_0:
case ShaderConvert::DATM_1:
case ShaderConvert::CLUT_4:
case ShaderConvert::CLUT_8:
case ShaderConvert::HDR_INIT:
case ShaderConvert::HDR_RESOLVE:
continue;
@ -1298,6 +1307,18 @@ void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
}
}}
void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
GSMTLCLUTConvertPSUniform uniform = { ToSimd(sTex->GetScale()), {offsetX, offsetY}, dOffset };
const bool is_clut4 = dSize == 16;
const GSVector4i dRect(0, 0, dSize, 1);
BeginRenderPass(@"CLUT Update", dTex, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare);
[m_current_render.encoder setFragmentBytes:&uniform length:sizeof(uniform) atIndex:GSMTLBufferIndexUniforms];
RenderCopy(sTex, m_clut_pipeline[!is_clut4], dRect);
}
void GSDeviceMTL::FlushClears(GSTexture* tex)
{
if (tex)

View File

@ -64,6 +64,13 @@ struct GSMTLCASPSUniform
vector_int2 srcOffset;
};
struct GSMTLCLUTConvertPSUniform
{
vector_float2 scale;
vector_uint2 offset;
uint doffset;
};
struct GSMTLMainVertex
{
vector_float2 st;

View File

@ -282,6 +282,35 @@ fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], DirectRe
return float4(sel1);
}
fragment float4 ps_convert_clut_4(ConvertShaderData data [[stage_in]],
texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]],
constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
// CLUT4 is easy, just two rows of 8x8.
uint index = uint(data.p.x) + uniform.doffset;
uint2 pos = uint2(index % 8, index / 8);
uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale);
return texture.read(final);
}
fragment float4 ps_convert_clut_8(ConvertShaderData data [[stage_in]],
texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]],
constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
uint index = min(uint(data.p.x) + uniform.doffset, 255u);
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
// This can probably be done better..
uint subgroup = (index / 8) % 4;
uint2 pos;
pos.x = (index % 8) + ((subgroup >= 2) ? 8 :0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale);
return texture.read(final);
}
fragment float4 ps_yuv(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{