GS:MTL: Implement clut shaders

2023-01-12 19:43:55 +01:00 · 2023-01-12 19:43:55 +01:00 · a2e3522862
parent e20c2210f5
commit a2e3522862
4 changed files with 59 additions and 0 deletions
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
@ -242,6 +242,7 @@ public:
 	MRCOwned<id<MTLRenderPipelineState>> m_merge_pipeline[4];
 	MRCOwned<id<MTLRenderPipelineState>> m_interlace_pipeline[NUM_INTERLACE_SHADERS];
 	MRCOwned<id<MTLRenderPipelineState>> m_datm_pipeline[2];
+	MRCOwned<id<MTLRenderPipelineState>> m_clut_pipeline[2];
 	MRCOwned<id<MTLRenderPipelineState>> m_stencil_clear_pipeline;
 	MRCOwned<id<MTLRenderPipelineState>> m_primid_init_pipeline[2][2];
 	MRCOwned<id<MTLRenderPipelineState>> m_hdr_init_pipeline;
@ -371,6 +372,7 @@ public:
 	void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
 	void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
 	void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
+	void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;

 	void FlushClears(GSTexture* tex);

--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
@ -26,6 +26,11 @@
 #ifdef __APPLE__
 #include "GSMTLSharedHeader.h"

+static constexpr simd::float2 ToSimd(const GSVector2& vec)
+{
+	return simd::make_float2(vec.x, vec.y);
+}
+
 static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status)
 {
 	switch (status)
@ -918,6 +923,8 @@ bool GSDeviceMTL::Create()
 		m_hdr_resolve_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_resolve"), @"HDR Resolve");
 		m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa");
 		m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost");
+		m_clut_pipeline[0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_4"), @"4-bit CLUT Update");
+		m_clut_pipeline[1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_convert_clut_8"), @"8-bit CLUT Update");
 		pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::HDRColor);
 		m_hdr_init_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_hdr_init"), @"HDR Init");
 		pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid;
@ -954,6 +961,8 @@ bool GSDeviceMTL::Create()
 				case ShaderConvert::Count:
 				case ShaderConvert::DATM_0:
 				case ShaderConvert::DATM_1:
+				case ShaderConvert::CLUT_4:
+				case ShaderConvert::CLUT_8:
 				case ShaderConvert::HDR_INIT:
 				case ShaderConvert::HDR_RESOLVE:
 					continue;
@ -1298,6 +1307,18 @@ void GSDeviceMTL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
 	}
 }}

+void GSDeviceMTL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
+{
+	GSMTLCLUTConvertPSUniform uniform = { ToSimd(sTex->GetScale()), {offsetX, offsetY}, dOffset };
+
+	const bool is_clut4 = dSize == 16;
+	const GSVector4i dRect(0, 0, dSize, 1);
+
+	BeginRenderPass(@"CLUT Update", dTex, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare);
+	[m_current_render.encoder setFragmentBytes:&uniform length:sizeof(uniform) atIndex:GSMTLBufferIndexUniforms];
+	RenderCopy(sTex, m_clut_pipeline[!is_clut4], dRect);
+}
+
 void GSDeviceMTL::FlushClears(GSTexture* tex)
 {
 	if (tex)
--- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
+++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h
@ -64,6 +64,13 @@ struct GSMTLCASPSUniform
 	vector_int2 srcOffset;
 };

+struct GSMTLCLUTConvertPSUniform
+{
+	vector_float2 scale;
+	vector_uint2 offset;
+	uint doffset;
+};
+
 struct GSMTLMainVertex
 {
 	vector_float2 st;
--- a/pcsx2/GS/Renderers/Metal/convert.metal
+++ b/pcsx2/GS/Renderers/Metal/convert.metal
@ -282,6 +282,35 @@ fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], DirectRe
 	return float4(sel1);
 }

+fragment float4 ps_convert_clut_4(ConvertShaderData data [[stage_in]],
+	texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]],
+	constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
+{
+	// CLUT4 is easy, just two rows of 8x8.
+	uint index = uint(data.p.x) + uniform.doffset;
+	uint2 pos = uint2(index % 8, index / 8);
+
+	uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale);
+	return texture.read(final);
+}
+
+fragment float4 ps_convert_clut_8(ConvertShaderData data [[stage_in]],
+	texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]],
+	constant GSMTLCLUTConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
+{
+	uint index = min(uint(data.p.x) + uniform.doffset, 255u);
+
+	// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
+	// This can probably be done better..
+	uint subgroup = (index / 8) % 4;
+	uint2 pos;
+	pos.x = (index % 8) + ((subgroup >= 2) ? 8 :0u);
+	pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
+
+	uint2 final = uint2(float2(uniform.offset + pos) * uniform.scale);
+	return texture.read(final);
+}
+
 fragment float4 ps_yuv(ConvertShaderData data [[stage_in]], ConvertPSRes res,
 	constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
 {