diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 434ebd19ac..ed97851f4c 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -242,6 +242,7 @@ public: MRCOwned> m_merge_pipeline[4]; MRCOwned> m_interlace_pipeline[NUM_INTERLACE_SHADERS]; MRCOwned> m_datm_pipeline[2]; + MRCOwned> m_clut_pipeline[2]; MRCOwned> m_stencil_clear_pipeline; MRCOwned> m_primid_init_pipeline[2][2]; MRCOwned> m_hdr_init_pipeline; @@ -371,6 +372,7 @@ public: void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override; void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override; + void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override; void FlushClears(GSTexture* tex); diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index d3db177756..3a717d219d 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -26,6 +26,11 @@ #ifdef __APPLE__ #include "GSMTLSharedHeader.h" +static constexpr simd::float2 ToSimd(const GSVector2& vec) +{ + return simd::make_float2(vec.x, vec.y); +} + static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status) { switch (status) @@ -918,6 +923,8 @@ bool GSDeviceMTL::Create() m_hdr_resolve_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_resolve"), @"HDR Resolve"); m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa"); m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost"); + m_clut_pipeline[0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_4"), @"4-bit CLUT Update"); + m_clut_pipeline[1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_8"), @"8-bit CLUT Update"); pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::HDRColor); m_hdr_init_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_init"), @"HDR Init"); pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid; @@ -954,6 +961,8 @@ bool GSDeviceMTL::Create() case ShaderConvert::Count: case ShaderConvert::DATM_0: case ShaderConvert::DATM_1: + case ShaderConvert::CLUT_4: + case ShaderConvert::CLUT_8: case ShaderConvert::HDR_INIT: case ShaderConvert::HDR_RESOLVE: continue; @@ -1298,6 +1307,18 @@ void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture } }} +void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) +{ + GSMTLCLUTConvertPSUniform uniform = { ToSimd(sTex->GetScale()), {offsetX, offsetY}, dOffset }; + + const bool is_clut4 = dSize == 16; + const GSVector4i dRect(0, 0, dSize, 1); + + BeginRenderPass(@"CLUT Update", dTex, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare); + [m_current_render.encoder setFragmentBytes:&uniform length:sizeof(uniform) atIndex:GSMTLBufferIndexUniforms]; + RenderCopy(sTex, m_clut_pipeline[!is_clut4], dRect); +} + void GSDeviceMTL::FlushClears(GSTexture* tex) { if (tex) diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 05634115fb..6d22d1b2d1 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -64,6 +64,13 @@ struct GSMTLCASPSUniform vector_int2 srcOffset; }; +struct GSMTLCLUTConvertPSUniform +{ + vector_float2 scale; + vector_uint2 offset; + uint doffset; +}; + struct GSMTLMainVertex { vector_float2 st; diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index 3402a2bf15..8d3cc3193b 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -282,6 +282,35 @@ fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], DirectRe return float4(sel1); } +fragment float4 ps_convert_clut_4(ConvertShaderData data [[stage_in]], + texture2d texture [[texture(GSMTLTextureIndexNonHW)]], + constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + // CLUT4 is easy, just two rows of 8x8. + uint index = uint(data.p.x) + uniform.doffset; + uint2 pos = uint2(index % 8, index / 8); + + uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale); + return texture.read(final); +} + +fragment float4 ps_convert_clut_8(ConvertShaderData data [[stage_in]], + texture2d texture [[texture(GSMTLTextureIndexNonHW)]], + constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + uint index = min(uint(data.p.x) + uniform.doffset, 255u); + + // CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped. + // This can probably be done better.. + uint subgroup = (index / 8) % 4; + uint2 pos; + pos.x = (index % 8) + ((subgroup >= 2) ? 8 :0u); + pos.y = ((index / 32u) * 2u) + (subgroup % 2u); + + uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale); + return texture.read(final); +} + fragment float4 ps_yuv(ConvertShaderData data [[stage_in]], ConvertPSRes res, constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) {