From 5ecaa9459ddcdf75688af65ee991b9dd3859aff6 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 16 Nov 2021 05:17:30 -0600 Subject: [PATCH] GS: Add Metal renderer --- bin/resources/shaders/common/fxaa.fx | 33 +- cmake/BuildParameters.cmake | 9 +- common/CMakeLists.txt | 5 +- common/MRCHelpers.h | 97 + pcsx2/CMakeLists.txt | 72 + pcsx2/Frontend/MetalHostDisplay.h | 84 + pcsx2/Frontend/MetalHostDisplay.mm | 410 ++++ pcsx2/GS/GSRegs.h | 3 +- pcsx2/GS/Renderers/Common/GSDevice.h | 3 + pcsx2/GS/Renderers/Metal/GSDeviceMTL.h | 398 ++++ pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 1795 +++++++++++++++++ pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h | 66 + pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm | 214 ++ pcsx2/GS/Renderers/Metal/GSMTLShaderCommon.h | 60 + pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h | 151 ++ .../GS/Renderers/Metal/GSMetalCPPAccessible.h | 28 + pcsx2/GS/Renderers/Metal/GSTextureMTL.h | 82 + pcsx2/GS/Renderers/Metal/GSTextureMTL.mm | 216 ++ pcsx2/GS/Renderers/Metal/convert.metal | 378 ++++ pcsx2/GS/Renderers/Metal/fxaa.metal | 10 + pcsx2/GS/Renderers/Metal/interlace.metal | 49 + pcsx2/GS/Renderers/Metal/merge.metal | 34 + pcsx2/GS/Renderers/Metal/tfx.metal | 939 +++++++++ pcsx2/HostDisplay.cpp | 16 +- pcsx2/HostDisplay.h | 1 + 25 files changed, 5136 insertions(+), 17 deletions(-) create mode 100644 common/MRCHelpers.h create mode 100644 pcsx2/Frontend/MetalHostDisplay.h create mode 100644 pcsx2/Frontend/MetalHostDisplay.mm create mode 100644 pcsx2/GS/Renderers/Metal/GSDeviceMTL.h create mode 100644 pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm create mode 100644 pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h create mode 100644 pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm create mode 100644 pcsx2/GS/Renderers/Metal/GSMTLShaderCommon.h create mode 100644 pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h create mode 100644 pcsx2/GS/Renderers/Metal/GSMetalCPPAccessible.h create mode 100644 pcsx2/GS/Renderers/Metal/GSTextureMTL.h create mode 100644 pcsx2/GS/Renderers/Metal/GSTextureMTL.mm create mode 100644 pcsx2/GS/Renderers/Metal/convert.metal create mode 100644 pcsx2/GS/Renderers/Metal/fxaa.metal create mode 100644 pcsx2/GS/Renderers/Metal/interlace.metal create mode 100644 pcsx2/GS/Renderers/Metal/merge.metal create mode 100644 pcsx2/GS/Renderers/Metal/tfx.metal diff --git a/bin/resources/shaders/common/fxaa.fx b/bin/resources/shaders/common/fxaa.fx index ccf926e253..3e2716d057 100644 --- a/bin/resources/shaders/common/fxaa.fx +++ b/bin/resources/shaders/common/fxaa.fx @@ -1,4 +1,4 @@ -#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK) +#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK) || defined(__METAL_VERSION__) #ifndef FXAA_GLSL_130 #define FXAA_GLSL_130 0 @@ -47,6 +47,8 @@ struct PS_OUTPUT float4 c : SV_Target0; }; +#elif defined(__METAL_VERSION__) +static constexpr sampler MAIN_SAMPLER(coord::normalized, address::clamp_to_edge, filter::linear); #endif /*------------------------------------------------------------------------------ @@ -63,6 +65,9 @@ struct PS_OUTPUT #elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1) #define FXAA_GATHER4_ALPHA 1 + +#elif defined(__METAL_VERSION__) +#define FXAA_GATHER4_ALPHA 1 #endif #if (FXAA_HLSL_5 == 1) @@ -98,6 +103,14 @@ struct FxaaTex { SamplerState smpl; Texture2D tex; }; #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) #endif +#elif defined(__METAL_VERSION__) +#define FxaaTex texture2d +#define FxaaTexTop(t, p) t.sample(MAIN_SAMPLER, p) +#define FxaaTexOff(t, p, o, r) t.sample(MAIN_SAMPLER, p, o) +#define FxaaTexAlpha4(t, p) t.gather(MAIN_SAMPLER, p, 0, component::w) +#define FxaaTexOffAlpha4(t, p, o) t.gather(MAIN_SAMPLER, p, o, component::w) +#define FxaaDiscard discard_fragment() +#define FxaaSat(x) saturate(x) #endif #define FxaaEdgeThreshold 0.063 @@ -151,14 +164,8 @@ float3 LinearToRGBGamma(float3 color, float gamma) return color; } -float4 PreGammaPass(float4 color, float2 uv0) +float4 PreGammaPass(float4 color) { - #if (SHADER_MODEL >= 0x400) - color = Texture.Sample(TextureSampler, uv0); - #elif (FXAA_GLSL_130 == 1) - color = texture(TextureSampler, uv0); - #endif - const float GammaConst = 2.233; color.rgb = RGBGammaToLinear(color.rgb, GammaConst); color.rgb = LinearToRGBGamma(color.rgb, GammaConst); @@ -483,6 +490,8 @@ float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaS float4 FxaaPass(float4 FxaaColor, float2 uv0) #elif (SHADER_MODEL >= 0x400) float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0) +#elif defined(__METAL_VERSION__) +float4 FxaaPass(float4 FxaaColor, float2 uv0, texture2d tex) #endif { @@ -498,6 +507,9 @@ float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0) #elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1) vec2 PixelSize = textureSize(TextureSampler, 0); FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin); + #elif defined(__METAL_VERSION__) + float2 PixelSize = float2(tex.get_width(), tex.get_height()); + FxaaColor = FxaaPixelShader(uv0, tex, 1.f/PixelSize, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin); #endif return FxaaColor; @@ -511,7 +523,7 @@ float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0) void main() { vec4 color = texture(TextureSampler, PSin_t); - color = PreGammaPass(color, PSin_t); + color = PreGammaPass(color); color = FxaaPass(color, PSin_t); SV_Target0 = color; @@ -524,7 +536,7 @@ PS_OUTPUT ps_main(VS_OUTPUT input) float4 color = Texture.Sample(TextureSampler, input.t); - color = PreGammaPass(color, input.t); + color = PreGammaPass(color); color = FxaaPass(color, input.t); output.c = color; @@ -532,6 +544,7 @@ PS_OUTPUT ps_main(VS_OUTPUT input) return output; } +// Metal main function in in fxaa.metal #endif #endif diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index 3c098bb783..dbb4f0bfbd 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -305,9 +305,14 @@ endif() # MacOS-specific things #------------------------------------------------------------------------------- -set(CMAKE_OSX_DEPLOYMENT_TARGET 10.13) +if(NOT CMAKE_GENERATOR MATCHES "Xcode") + # Assume Xcode builds aren't being used for distribution + # Helpful because Xcode builds don't build multiple metallibs for different macOS versions + # Also helpful because Xcode's interactive shader debugger requires apps be built for the latest macOS + set(CMAKE_OSX_DEPLOYMENT_TARGET 10.13) +endif() -if (APPLE AND ${CMAKE_OSX_DEPLOYMENT_TARGET} VERSION_LESS 10.14 AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 9) +if (APPLE AND CMAKE_OSX_DEPLOYMENT_TARGET AND "${CMAKE_OSX_DEPLOYMENT_TARGET}" VERSION_LESS 10.14 AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 9) # Older versions of the macOS stdlib don't have operator new(size_t, align_val_t) # Disable use of them with this flag # Not great, but also no worse that what we were getting before we turned on C++17 diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 0e33d3776c..163f31aeff 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -86,6 +86,7 @@ target_sources(common PRIVATE MemcpyFast.h MemsetFast.inl MD5Digest.h + MRCHelpers.h Path.h PageFaultSource.h PrecompiledHeader.h @@ -185,8 +186,8 @@ elseif(APPLE) GL/ContextAGL.h ) set_source_files_properties(GL/ContextAGL.mm PROPERTIES SKIP_PRECOMPILE_HEADERS ON) - target_compile_options(common PUBLIC -fobjc-arc) - target_link_options(common PUBLIC -fobjc-link-runtime) + target_compile_options(common PRIVATE -fobjc-arc) + target_link_options(common PRIVATE -fobjc-link-runtime) else() if(X11_API OR WAYLAND_API) target_sources(common PRIVATE diff --git a/common/MRCHelpers.h b/common/MRCHelpers.h new file mode 100644 index 0000000000..b78f12bb97 --- /dev/null +++ b/common/MRCHelpers.h @@ -0,0 +1,97 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#ifndef __OBJC__ + #error This header is for use with Objective-C++ only. +#endif + +#if __has_feature(objc_arc) + #error This file is for manual reference counting! Compile without -fobjc-arc +#endif + +#pragma once + +#include +#include + +/// Managed Obj-C pointer +template +class MRCOwned +{ + T ptr; + MRCOwned(T ptr): ptr(ptr) {} +public: + MRCOwned(): ptr(nullptr) {} + MRCOwned(std::nullptr_t): ptr(nullptr) {} + MRCOwned(MRCOwned&& other) + : ptr(other.ptr) + { + other.ptr = nullptr; + } + MRCOwned(const MRCOwned& other) + : ptr(other.ptr) + { + [ptr retain]; + } + ~MRCOwned() + { + if (ptr) + [ptr release]; + } + operator T() const { return ptr; } + MRCOwned& operator=(const MRCOwned& other) + { + [other.ptr retain]; + if (ptr) + [ptr release]; + ptr = other.ptr; + return *this; + } + MRCOwned& operator=(MRCOwned&& other) + { + std::swap(ptr, other.ptr); + return *this; + } + void Reset() + { + [ptr release]; + ptr = nullptr; + } + T Get() const { return ptr; } + static MRCOwned Transfer(T ptr) + { + return MRCOwned(ptr); + } + static MRCOwned Retain(T ptr) + { + [ptr retain]; + return MRCOwned(ptr); + } +}; + +/// Take ownership of an Obj-C pointer (equivalent to __bridge_transfer) +template +static inline MRCOwned MRCTransfer(T ptr) +{ + return MRCOwned::Transfer(ptr); +} + +/// Retain an Obj-C pointer (equivalent to __bridge) +template +static inline MRCOwned MRCRetain(T ptr) +{ + return MRCOwned::Retain(ptr); +} + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 55c57ec84b..ed064da644 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -789,6 +789,14 @@ if(USE_VULKAN) ) endif() +set(pcsx2GSMetalShaders + GS/Renderers/Metal/convert.metal + GS/Renderers/Metal/merge.metal + GS/Renderers/Metal/interlace.metal + GS/Renderers/Metal/tfx.metal + GS/Renderers/Metal/fxaa.metal +) + if(NOT PCSX2_CORE) list(APPEND pcsx2GSSources GS/Window/GSwxDialog.cpp @@ -1004,6 +1012,26 @@ if(WIN32) list(APPEND pcsx2FrontendHeaders Frontend/D3D11HostDisplay.h ) +elseif(APPLE) + list(APPEND pcsx2GSSources + GS/Renderers/Metal/GSDeviceMTL.mm + GS/Renderers/Metal/GSMTLDeviceInfo.mm + GS/Renderers/Metal/GSTextureMTL.mm + ) + list(APPEND pcsx2GSHeaders + GS/Renderers/Metal/GSDeviceMTL.h + GS/Renderers/Metal/GSMetalCPPAccessible.h + GS/Renderers/Metal/GSMTLDeviceInfo.h + GS/Renderers/Metal/GSMTLSharedHeader.h + GS/Renderers/Metal/GSMTLShaderCommon.h + GS/Renderers/Metal/GSTextureMTL.h + ) + list(APPEND pcsx2FrontendSources + Frontend/MetalHostDisplay.mm + ) + list(APPEND pcsx2FrontendHeaders + Frontend/MetalHostDisplay.h + ) endif() if(PCSX2_CORE) @@ -1710,6 +1738,49 @@ if(GETTEXT_FOUND AND NOT NO_TRANSLATION AND NOT PCSX2_CORE) endif() if (APPLE) + find_library(METAL_LIBRARY Metal) + target_link_libraries(PCSX2 PRIVATE ${METAL_LIBRARY}) + + if(CMAKE_GENERATOR MATCHES "Xcode") + # If we're generating an xcode project, you can just add the shaders to the main pcsx2 target and xcode will deal with them properly + # This will make sure xcode supplies code completion, etc (if you use a custom command, it won't) + set_target_properties(PCSX2 PROPERTIES + XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE + ) + foreach(shader IN LISTS pcsx2GSMetalShaders) + target_sources(PCSX2 PRIVATE ${shader}) + set_source_files_properties(${shader} PROPERTIES LANGUAGE METAL) + endforeach() + else() + function(generateMetallib std target outputName) + set(pcsx2GSMetalShaderOut) + set(flags + -ffast-math + $<$>:-gline-tables-only> + $<$>:-MO> + ) + foreach(shader IN LISTS pcsx2GSMetalShaders) + set(shaderOut ${CMAKE_CURRENT_BINARY_DIR}/${outputName}/${shader}.air) + list(APPEND pcsx2GSMetalShaderOut ${shaderOut}) + get_filename_component(shaderDir ${shaderOut} DIRECTORY) + add_custom_command(OUTPUT ${shaderOut} + COMMAND ${CMAKE_COMMAND} -E make_directory ${shaderDir} + COMMAND xcrun metal ${flags} -std=${std} -target ${target} -o ${shaderOut} -c ${CMAKE_CURRENT_SOURCE_DIR}/${shader} + DEPENDS ${shader} GS/Renderers/Metal/GSMTLSharedHeader.h GS/Renderers/Metal/GSMTLShaderCommon.h + ) + set(metallib ${CMAKE_CURRENT_BINARY_DIR}/${outputName}.metallib) + endforeach() + add_custom_command(OUTPUT ${metallib} + COMMAND xcrun metallib -o ${metallib} ${pcsx2GSMetalShaderOut} + DEPENDS ${pcsx2GSMetalShaderOut} + ) + pcsx2_resource(${metallib} ${CMAKE_CURRENT_BINARY_DIR}) + endfunction() + generateMetallib(macos-metal2.0 air64-apple-macos10.13 default) + generateMetallib(macos-metal2.2 air64-apple-macos10.15 Metal22) + generateMetallib(macos-metal2.3 air64-apple-macos11.0 Metal23) + endif() + # MacOS defaults to having a maximum protection of the __DATA segment of rw (non-executable) # We have a bunch of page-sized arrays in bss that we use for jit # Obviously not being able to make those arrays executable would be a problem @@ -1766,6 +1837,7 @@ source_group(System/Ps2/DEV9 REGULAR_EXPRESSION DEV9/*) source_group(System/Ps2/PAD FILES ${pcsx2PADSources} ${pcsx2PADHeaders}) source_group(System/Ps2/SPU2 REGULAR_EXPRESSION SPU2/*) source_group(System/Ps2/USB REGULAR_EXPRESSION USB/*) +source_group(System/Ps2/GS/Renderers/Metal REGULAR_EXPRESSION GS/Renderers/Metal/*) # Generated resource files source_group(Resources/GUI FILES ${pcsx2GuiResources}) diff --git a/pcsx2/Frontend/MetalHostDisplay.h b/pcsx2/Frontend/MetalHostDisplay.h new file mode 100644 index 0000000000..83d630abf0 --- /dev/null +++ b/pcsx2/Frontend/MetalHostDisplay.h @@ -0,0 +1,84 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "HostDisplay.h" + +#ifndef __OBJC__ + #error "This header is for use with Objective-C++ only. +#endif + +#ifdef __APPLE__ + +#include "GS/Renderers/Metal/GSMTLDeviceInfo.h" +#include +#include +#include + +class MetalHostDisplay final : public HostDisplay +{ + MRCOwned m_view; + MRCOwned m_layer; + GSMTLDevice m_dev; + MRCOwned> m_queue; + MRCOwned> m_font_tex; + MRCOwned> m_current_drawable; + MRCOwned m_pass_desc; + u32 m_capture_start_frame; + + void AttachSurfaceOnMainThread(); + void DetachSurfaceOnMainThread(); + +public: + MetalHostDisplay(); + ~MetalHostDisplay(); + RenderAPI GetRenderAPI() const override; + void* GetRenderDevice() const override; + void* GetRenderContext() const override; + void* GetRenderSurface() const override; + + bool HasRenderDevice() const override; + bool HasRenderSurface() const override; + bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, VsyncMode vsync, bool threaded_presentation, bool debug_device) override; + bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override; + bool MakeRenderContextCurrent() override; + bool DoneRenderContextCurrent() override; + void DestroyRenderDevice() override; + void DestroyRenderSurface() override; + bool ChangeRenderWindow(const WindowInfo& wi) override; + bool SupportsFullscreen() const override; + bool IsFullscreen() override; + bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; + AdapterAndModeList GetAdapterAndModeList() override; + std::string GetDriverInfo() const override; + + void ResizeRenderWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; + + std::unique_ptr CreateTexture(u32 width, u32 height, const void* data, u32 data_stride, bool dynamic = false) override; + void UpdateTexture(id texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride); + void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) override; + bool BeginPresent(bool frame_skip) override; + void EndPresent() override; + void SetVSync(VsyncMode mode) override; + + bool CreateImGuiContext() override; + void DestroyImGuiContext() override; + bool UpdateImGuiFontTexture() override; + + bool GetHostRefreshRate(float* refresh_rate) override; +}; + +#endif diff --git a/pcsx2/Frontend/MetalHostDisplay.mm b/pcsx2/Frontend/MetalHostDisplay.mm new file mode 100644 index 0000000000..bcb25800b1 --- /dev/null +++ b/pcsx2/Frontend/MetalHostDisplay.mm @@ -0,0 +1,410 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "MetalHostDisplay.h" +#include "GS/Renderers/Metal/GSMetalCPPAccessible.h" +#include "GS/Renderers/Metal/GSDeviceMTL.h" +#include + +#ifdef __APPLE__ + +class MetalHostDisplayTexture final : public HostDisplayTexture +{ + MRCOwned> m_tex; + u32 m_width, m_height; +public: + MetalHostDisplayTexture(MRCOwned> tex, u32 width, u32 height) + : m_tex(std::move(tex)) + , m_width(width) + , m_height(height) + { + } + + void* GetHandle() const override { return (__bridge void*)m_tex; }; + u32 GetWidth() const override { return m_width; } + u32 GetHeight() const override { return m_height; } +}; + +HostDisplay* MakeMetalHostDisplay() +{ + return new MetalHostDisplay(); +} + +MetalHostDisplay::MetalHostDisplay() +{ +} + +MetalHostDisplay::~MetalHostDisplay() +{ +} + +HostDisplay::AdapterAndModeList GetMetalAdapterAndModeList() +{ @autoreleasepool { + HostDisplay::AdapterAndModeList list; + auto devs = MRCTransfer(MTLCopyAllDevices()); + for (id dev in devs.Get()) + list.adapter_names.push_back([[dev name] UTF8String]); + return list; +}} + +template +static void OnMainThread(Fn&& fn) +{ + if ([NSThread isMainThread]) + fn(); + else + dispatch_sync(dispatch_get_main_queue(), fn); +} + +HostDisplay::RenderAPI MetalHostDisplay::GetRenderAPI() const +{ + return RenderAPI::Metal; +} + +void* MetalHostDisplay::GetRenderDevice() const { return const_cast(static_cast(&m_dev)); } +void* MetalHostDisplay::GetRenderContext() const { return (__bridge void*)m_queue; } +void* MetalHostDisplay::GetRenderSurface() const { return (__bridge void*)m_layer; } +bool MetalHostDisplay::HasRenderDevice() const { return m_dev.IsOk(); } +bool MetalHostDisplay::HasRenderSurface() const { return static_cast(m_layer);} + +void MetalHostDisplay::AttachSurfaceOnMainThread() +{ + ASSERT([NSThread isMainThread]); + m_view = MRCRetain((__bridge NSView*)m_window_info.window_handle); + [m_view setWantsLayer:YES]; + [m_view setLayer:m_layer]; +} + +void MetalHostDisplay::DetachSurfaceOnMainThread() +{ + ASSERT([NSThread isMainThread]); + [m_view setLayer:nullptr]; + [m_view setWantsLayer:NO]; + m_view = nullptr; +} + +bool MetalHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, VsyncMode vsync, bool threaded_presentation, bool debug_device) +{ @autoreleasepool { + m_window_info = wi; + pxAssertRel(!m_dev.dev, "Device already created!"); + std::string null_terminated_adapter_name(adapter_name); + NSString* ns_adapter_name = [NSString stringWithUTF8String:null_terminated_adapter_name.c_str()]; + auto devs = MRCTransfer(MTLCopyAllDevices()); + for (id dev in devs.Get()) + { + if ([[dev name] isEqualToString:ns_adapter_name]) + m_dev = GSMTLDevice(MRCRetain(dev)); + } + if (!m_dev.dev) + { + if (!adapter_name.empty()) + Console.Warning("Metal: Couldn't find adapter %s, using default", null_terminated_adapter_name.c_str()); + m_dev = GSMTLDevice(MRCTransfer(MTLCreateSystemDefaultDevice())); + } + m_queue = MRCTransfer([m_dev.dev newCommandQueue]); + + m_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]); + [m_pass_desc colorAttachments][0].loadAction = MTLLoadActionClear; + [m_pass_desc colorAttachments][0].clearColor = MTLClearColorMake(0, 0, 0, 0); + [m_pass_desc colorAttachments][0].storeAction = MTLStoreActionStore; + + m_capture_start_frame = 0; + if (char* env = getenv("MTL_CAPTURE")) + { + m_capture_start_frame = atoi(env); + } + if (m_capture_start_frame) + { + Console.WriteLn("Metal will capture frame %u", m_capture_start_frame); + } + + if (m_dev.IsOk() && m_queue) + { + OnMainThread([this] + { + m_layer = MRCRetain([CAMetalLayer layer]); + [m_layer setDrawableSize:CGSizeMake(m_window_info.surface_width, m_window_info.surface_height)]; + [m_layer setDevice:m_dev.dev]; + AttachSurfaceOnMainThread(); + }); + SetVSync(vsync); + return true; + } + else + return false; +}} + +bool MetalHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) +{ + return true; +} + +bool MetalHostDisplay::MakeRenderContextCurrent() { return true; } +bool MetalHostDisplay::DoneRenderContextCurrent() { return true; } + +void MetalHostDisplay::DestroyRenderDevice() +{ + DestroyRenderSurface(); + m_queue = nullptr; + m_dev.Reset(); +} + +void MetalHostDisplay::DestroyRenderSurface() +{ + if (!m_layer) + return; + OnMainThread([this]{ DetachSurfaceOnMainThread(); }); + m_layer = nullptr; +} + +bool MetalHostDisplay::ChangeRenderWindow(const WindowInfo& wi) +{ + OnMainThread([this, &wi] + { + DetachSurfaceOnMainThread(); + m_window_info = wi; + AttachSurfaceOnMainThread(); + }); + return true; +} + +bool MetalHostDisplay::SupportsFullscreen() const { return false; } +bool MetalHostDisplay::IsFullscreen() { return false; } +bool MetalHostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) { return false; } + +HostDisplay::AdapterAndModeList MetalHostDisplay::GetAdapterAndModeList() +{ + return GetMetalAdapterAndModeList(); +} + +std::string MetalHostDisplay::GetDriverInfo() const +{ @autoreleasepool { + std::string desc([[m_dev.dev description] UTF8String]); + desc += "\n Texture Swizzle: " + std::string(m_dev.features.texture_swizzle ? "Supported" : "Unsupported"); + desc += "\n Unified Memory: " + std::string(m_dev.features.unified_memory ? "Supported" : "Unsupported"); + desc += "\n Framebuffer Fetch: " + std::string(m_dev.features.framebuffer_fetch ? "Supported" : "Unsupported"); + desc += "\n Primitive ID: " + std::string(m_dev.features.primid ? "Supported" : "Unsupported"); + desc += "\n Shader Version: " + std::string(to_string(m_dev.features.shader_version)); + desc += "\n Max Texture Size: " + std::to_string(m_dev.features.max_texsize); + return desc; +}} + +void MetalHostDisplay::ResizeRenderWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) +{ + m_window_info.surface_scale = new_window_scale; + if (m_window_info.surface_width == static_cast(new_window_width) && m_window_info.surface_height == static_cast(new_window_height)) + return; + m_window_info.surface_width = new_window_width; + m_window_info.surface_height = new_window_height; + @autoreleasepool + { + [m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)]; + } +} + +std::unique_ptr MetalHostDisplay::CreateTexture(u32 width, u32 height, const void* data, u32 data_stride, bool dynamic) +{ @autoreleasepool { + MTLTextureDescriptor* desc = [MTLTextureDescriptor + texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:width + height:height + mipmapped:false]; + [desc setUsage:MTLTextureUsageShaderRead]; + [desc setStorageMode:MTLStorageModePrivate]; + MRCOwned> tex = MRCTransfer([m_dev.dev newTextureWithDescriptor:desc]); + if (!tex) + return nullptr; // Something broke yay + [tex setLabel:@"MetalHostDisplay Texture"]; + if (data) + UpdateTexture(tex, 0, 0, width, height, data, data_stride); + return std::make_unique(std::move(tex), width, height); +}} + +void MetalHostDisplay::UpdateTexture(id texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) +{ + id cmdbuf = [m_queue commandBuffer]; + id enc = [cmdbuf blitCommandEncoder]; + size_t bytes = data_stride * height; + MRCOwned> buf = MRCTransfer([m_dev.dev newBufferWithLength:bytes options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined]); + memcpy([buf contents], data, bytes); + [enc copyFromBuffer:buf + sourceOffset:0 + sourceBytesPerRow:data_stride + sourceBytesPerImage:bytes + sourceSize:MTLSizeMake(width, height, 1) + toTexture:texture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [enc endEncoding]; + [cmdbuf commit]; +} + +void MetalHostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) +{ @autoreleasepool { + UpdateTexture((__bridge id)texture->GetHandle(), x, y, width, height, data, data_stride); +}} + +static bool s_capture_next = false; + +bool MetalHostDisplay::BeginPresent(bool frame_skip) +{ @autoreleasepool { + GSDeviceMTL* dev = static_cast(g_gs_device.get()); + if (dev && m_capture_start_frame && dev->FrameNo() == m_capture_start_frame) + s_capture_next = true; + if (frame_skip || m_window_info.type == WindowInfo::Type::Surfaceless || !g_gs_device) + { + ImGui::EndFrame(); + return false; + } + id buf = dev->GetRenderCmdBuf(); + m_current_drawable = MRCRetain([m_layer nextDrawable]); + dev->EndRenderPass(); + if (!m_current_drawable) + { + [buf pushDebugGroup:@"Present Skipped"]; + [buf popDebugGroup]; + dev->FlushEncoders(); + ImGui::EndFrame(); + return false; + } + [m_pass_desc colorAttachments][0].texture = [m_current_drawable texture]; + id enc = [buf renderCommandEncoderWithDescriptor:m_pass_desc]; + [enc setLabel:@"Present"]; + dev->m_current_render.encoder = MRCRetain(enc); + return true; +}} + +void MetalHostDisplay::EndPresent() +{ @autoreleasepool { + GSDeviceMTL* dev = static_cast(g_gs_device.get()); + pxAssertDev(dev && dev->m_current_render.encoder && dev->m_current_render_cmdbuf, "BeginPresent cmdbuf was destroyed"); + ImGui::Render(); + dev->RenderImGui(ImGui::GetDrawData()); + dev->EndRenderPass(); + if (m_current_drawable) + [dev->m_current_render_cmdbuf addScheduledHandler:[drawable = std::move(m_current_drawable)](id){ + [drawable present]; + }]; + dev->FlushEncoders(); + m_current_drawable = nullptr; + if (m_capture_start_frame) + { + if (@available(macOS 10.15, iOS 13, *)) + { + static NSString* const path = @"/tmp/PCSX2MTLCapture.gputrace"; + static u32 frames; + if (frames) + { + --frames; + if (!frames) + { + [[MTLCaptureManager sharedCaptureManager] stopCapture]; + Console.WriteLn("Metal Trace Capture to /tmp/PCSX2MTLCapture.gputrace finished"); + [[NSWorkspace sharedWorkspace] selectFile:path + inFileViewerRootedAtPath:@"/tmp/"]; + } + } + else if (s_capture_next) + { + s_capture_next = false; + MTLCaptureManager* mgr = [MTLCaptureManager sharedCaptureManager]; + if ([mgr supportsDestination:MTLCaptureDestinationGPUTraceDocument]) + { + MTLCaptureDescriptor* desc = [[MTLCaptureDescriptor new] autorelease]; + [desc setCaptureObject:m_dev.dev]; + if ([[NSFileManager defaultManager] fileExistsAtPath:path]) + [[NSFileManager defaultManager] removeItemAtPath:path error:nil]; + [desc setOutputURL:[NSURL fileURLWithPath:path]]; + [desc setDestination:MTLCaptureDestinationGPUTraceDocument]; + NSError* err = nullptr; + [mgr startCaptureWithDescriptor:desc error:&err]; + if (err) + { + Console.Error("Metal Trace Capture failed: %s", [[err localizedDescription] UTF8String]); + } + else + { + Console.WriteLn("Metal Trace Capture to /tmp/PCSX2MTLCapture.gputrace started"); + frames = 2; + } + } + else + { + Console.Error("Metal Trace Capture Failed: MTLCaptureManager doesn't support GPU trace documents! (Did you forget to run with METAL_CAPTURE_ENABLED=1?)"); + } + } + } + } +}} + +void MetalHostDisplay::SetVSync(VsyncMode mode) +{ + [m_layer setDisplaySyncEnabled:mode != VsyncMode::Off]; + m_vsync_mode = mode; +} + +bool MetalHostDisplay::CreateImGuiContext() +{ + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = "pcsx2_imgui_metal"; + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + return true; +} + +void MetalHostDisplay::DestroyImGuiContext() +{ + ImGui::GetIO().Fonts->SetTexID(nullptr); +} + +bool MetalHostDisplay::UpdateImGuiFontTexture() +{ @autoreleasepool { + u8* data; + int width, height; + ImFontAtlas* fonts = ImGui::GetIO().Fonts; + fonts->GetTexDataAsAlpha8(&data, &width, &height); + MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatA8Unorm width:width height:height mipmapped:false]; + [desc setUsage:MTLTextureUsageShaderRead]; + [desc setStorageMode:MTLStorageModePrivate]; + if (@available(macOS 10.15, *)) + if (m_dev.features.texture_swizzle) + [desc setSwizzle:MTLTextureSwizzleChannelsMake(MTLTextureSwizzleOne, MTLTextureSwizzleOne, MTLTextureSwizzleOne, MTLTextureSwizzleAlpha)]; + m_font_tex = MRCTransfer([m_dev.dev newTextureWithDescriptor:desc]); + [m_font_tex setLabel:@"ImGui Font"]; + UpdateTexture(m_font_tex, 0, 0, width, height, data, width); + fonts->SetTexID((__bridge void*)m_font_tex); + return static_cast(m_font_tex); +}} + +bool MetalHostDisplay::GetHostRefreshRate(float* refresh_rate) +{ + OnMainThread([this, refresh_rate] + { + u32 did = [[[[[m_view window] screen] deviceDescription] valueForKey:@"NSScreenNumber"] unsignedIntValue]; + if (CGDisplayModeRef mode = CGDisplayCopyDisplayMode(did)) + { + *refresh_rate = CGDisplayModeGetRefreshRate(mode); + CGDisplayModeRelease(mode); + } + else + { + *refresh_rate = 0; + } + }); + return *refresh_rate != 0; +} + +#endif // __APPLE__ diff --git a/pcsx2/GS/GSRegs.h b/pcsx2/GS/GSRegs.h index 9caeb8f1f2..c5b74cc371 100644 --- a/pcsx2/GS/GSRegs.h +++ b/pcsx2/GS/GSRegs.h @@ -18,7 +18,8 @@ // clang-format off // MacOS headers define PAGE_SIZE to the size of an x86 page -#ifdef PAGE_SIZE +#ifdef __APPLE__ + #include #undef PAGE_SIZE #endif diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 31b3ebd28c..3b9de64279 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -797,4 +797,7 @@ struct GSAdapter #endif }; +template <> +struct std::hash : public GSHWDrawConfig::PSSelectorHash {}; + extern std::unique_ptr g_gs_device; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h new file mode 100644 index 0000000000..b39797a4cb --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -0,0 +1,398 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "GS/Renderers/Common/GSDevice.h" + +#ifndef __OBJC__ + #error "This header is for use with Objective-C++ only. +#endif + +#ifdef __APPLE__ + +#include "common/HashCombine.h" +#include "common/MRCHelpers.h" +#include "GS/GS.h" +#include "GSMTLDeviceInfo.h" +#include "GSMTLSharedHeader.h" +#include +#include +#include +#include +#include +#include + +struct PipelineSelectorExtrasMTL +{ + union + { + struct + { + GSTexture::Format rt : 4; + u8 writemask : 4; + GSDevice::BlendFactor src_factor : 4; + GSDevice::BlendFactor dst_factor : 4; + GSDevice::BlendOp blend_op : 2; + bool blend_enable : 1; + bool has_depth : 1; + bool has_stencil : 1; + }; + u8 _key[3]; + }; + u32 fullkey() { return _key[0] | (_key[1] << 8) | (_key[2] << 16); } + + PipelineSelectorExtrasMTL(): _key{} {} + PipelineSelectorExtrasMTL(GSHWDrawConfig::BlendState blend, GSTexture* rt, GSHWDrawConfig::ColorMaskSelector cms, bool has_depth, bool has_stencil) + : _key{} + { + this->rt = rt ? rt->GetFormat() : GSTexture::Format::Invalid; + MTLColorWriteMask mask = MTLColorWriteMaskNone; + if (cms.wr) mask |= MTLColorWriteMaskRed; + if (cms.wg) mask |= MTLColorWriteMaskGreen; + if (cms.wb) mask |= MTLColorWriteMaskBlue; + if (cms.wa) mask |= MTLColorWriteMaskAlpha; + this->writemask = mask; + this->src_factor = static_cast(blend.src_factor); + this->dst_factor = static_cast(blend.dst_factor); + this->blend_op = static_cast(blend.op); + this->blend_enable = blend.enable; + this->has_depth = has_depth; + this->has_stencil = has_stencil; + } +}; +struct PipelineSelectorMTL +{ + GSHWDrawConfig::PSSelector ps; + PipelineSelectorExtrasMTL extras; + GSHWDrawConfig::VSSelector vs; + PipelineSelectorMTL() + { + memset(this, 0, sizeof(*this)); + } + PipelineSelectorMTL(GSHWDrawConfig::VSSelector vs, GSHWDrawConfig::PSSelector ps, PipelineSelectorExtrasMTL extras) + { + memset(this, 0, sizeof(*this)); + this->vs = vs; + this->ps = ps; + this->extras = extras; + } + PipelineSelectorMTL(const PipelineSelectorMTL& other) + { + memcpy(this, &other, sizeof(other)); + } + PipelineSelectorMTL& operator=(const PipelineSelectorMTL& other) + { + memcpy(this, &other, sizeof(other)); + return *this; + } + bool operator==(const PipelineSelectorMTL& other) const + { + return BitEqual(*this, other); + } +}; + +static_assert(sizeof(PipelineSelectorMTL) == 16); + +template <> +struct std::hash +{ + size_t operator()(const PipelineSelectorMTL& sel) const + { + size_t h = 0; + size_t pieces[(sizeof(PipelineSelectorMTL) + sizeof(size_t) - 1) / sizeof(size_t)] = {}; + memcpy(pieces, &sel, sizeof(PipelineSelectorMTL)); + for (auto& piece : pieces) + HashCombine(h, piece); + return h; + } +}; + +class GSScopedDebugGroupMTL +{ + id m_buffer; +public: + GSScopedDebugGroupMTL(id buffer, NSString* name): m_buffer(buffer) + { + [m_buffer pushDebugGroup:name]; + } + ~GSScopedDebugGroupMTL() + { + [m_buffer popDebugGroup]; + } +}; + +struct ImDrawData; +class GSTextureMTL; + +class GSDeviceMTL final : public GSDevice +{ +public: + using DepthStencilSelector = GSHWDrawConfig::DepthStencilSelector; + using SamplerSelector = GSHWDrawConfig::SamplerSelector; + enum class LoadAction + { + DontCare, + DontCareIfFull, + Load, + }; + class UsageTracker + { + struct UsageEntry + { + u64 drawno; + size_t pos; + }; + std::vector m_usage; + size_t m_size = 0; + size_t m_pos = 0; + public: + size_t Size() { return m_size; } + size_t Pos() { return m_pos; } + bool PrepareForAllocation(u64 last_draw, size_t amt); + size_t Allocate(u64 current_draw, size_t amt); + void Reset(size_t new_size); + }; + struct Map + { + id gpu_buffer; + size_t gpu_offset; + void* cpu_buffer; + }; + struct UploadBuffer + { + UsageTracker usage; + MRCOwned> mtlbuffer; + void* buffer = nullptr; + }; + struct BufferPair + { + UsageTracker usage; + MRCOwned> cpubuffer; + MRCOwned> gpubuffer; + void* buffer = nullptr; + size_t last_upload = 0; + }; + + struct ConvertShaderVertex + { + simd_float2 pos; + simd_float2 texpos; + }; + + struct VSSelector + { + union + { + struct + { + bool iip : 1; + bool fst : 1; + bool point_size : 1; + }; + u8 key; + }; + VSSelector(): key(0) {} + VSSelector(u8 key): key(key) {} + }; + + using PSSelector = GSHWDrawConfig::PSSelector; + + // MARK: Configuration + int m_mipmap; + + // MARK: Permanent resources + std::shared_ptr> m_backref; + GSMTLDevice m_dev; + MRCOwned> m_queue; + MRCOwned> m_draw_sync_fence; + MRCOwned m_fn_constants; + MRCOwned m_hw_vertex; + std::unique_ptr m_font; + + // Draw IDs are used to make sure we're not clobbering things + u64 m_current_draw = 1; + std::atomic m_last_finished_draw{0}; + + // Functions and Pipeline States + MRCOwned> m_convert_pipeline[static_cast(ShaderConvert::Count)]; + MRCOwned> m_present_pipeline[static_cast(ShaderConvert::Count)]; + MRCOwned> m_convert_pipeline_copy[2]; + MRCOwned> m_convert_pipeline_copy_mask[1 << 4]; + MRCOwned> m_merge_pipeline[4]; + MRCOwned> m_interlace_pipeline[4]; + MRCOwned> m_datm_pipeline[2]; + MRCOwned> m_stencil_clear_pipeline; + MRCOwned> m_primid_init_pipeline[2][2]; + MRCOwned> m_hdr_init_pipeline; + MRCOwned> m_hdr_resolve_pipeline; + MRCOwned> m_fxaa_pipeline; + MRCOwned> m_shadeboost_pipeline; + MRCOwned> m_imgui_pipeline; + MRCOwned> m_imgui_pipeline_a8; + + MRCOwned> m_hw_vs[1 << 3]; + std::unordered_map>> m_hw_ps; + std::unordered_map>> m_hw_pipeline; + + MRCOwned m_render_pass_desc[8]; + + MRCOwned> m_sampler_hw[1 << 8]; + + MRCOwned> m_dss_stencil_zero; + MRCOwned> m_dss_stencil_write; + MRCOwned> m_dss_hw[1 << 5]; + + MRCOwned> m_texture_download_buf; + UploadBuffer m_texture_upload_buf; + BufferPair m_vertex_upload_buf; + + // MARK: Ephemeral resources + MRCOwned> m_current_render_cmdbuf; + struct MainRenderEncoder + { + MRCOwned> encoder; + GSTexture* color_target = nullptr; + GSTexture* depth_target = nullptr; + GSTexture* stencil_target = nullptr; + GSTexture* tex[8] = {}; + void* vertex_buffer = nullptr; + void* name = nullptr; + struct Has + { + bool cb_vs : 1; + bool cb_ps : 1; + bool scissor : 1; + bool blend_color : 1; + bool pipeline_sel : 1; + bool sampler : 1; + } has; + DepthStencilSelector depth_sel = DepthStencilSelector::NoDepth(); + // Clear line (Things below here are tracked by `has` and don't need to be cleared to reset) + SamplerSelector sampler_sel; + u8 blend_color; + GSVector4i scissor; + PipelineSelectorMTL pipeline_sel; + GSHWDrawConfig::VSConstantBuffer cb_vs; + GSHWDrawConfig::PSConstantBuffer cb_ps; + MainRenderEncoder(const MainRenderEncoder&) = delete; + MainRenderEncoder() = default; + } m_current_render; + MRCOwned> m_texture_upload_cmdbuf; + MRCOwned> m_texture_upload_encoder; + MRCOwned> m_late_texture_upload_encoder; + MRCOwned> m_vertex_upload_cmdbuf; + MRCOwned> m_vertex_upload_encoder; + + struct DebugEntry + { + enum Op { Push, Insert, Pop } op; + MRCOwned str; + DebugEntry(Op op, MRCOwned str): op(op), str(std::move(str)) {} + }; + + std::vector m_debug_entries; + u32 m_debug_group_level = 0; + + GSDeviceMTL(); + ~GSDeviceMTL() override; + + /// Allocate space in the given buffer + Map Allocate(UploadBuffer& buffer, size_t amt); + /// Allocate space in the given buffer for use with the given render command encoder + Map Allocate(BufferPair& buffer, size_t amt); + /// Enqueue upload of any outstanding data + void Sync(BufferPair& buffer); + /// Get the texture upload encoder, creating a new one if it doesn't exist + id GetTextureUploadEncoder(); + /// Get the late texture upload encoder, creating a new one if it doesn't exist + id GetLateTextureUploadEncoder(); + /// Get the vertex upload encoder, creating a new one if it doesn't exist + id GetVertexUploadEncoder(); + /// Get the render command buffer, creating a new one if it doesn't exist + id GetRenderCmdBuf(); + /// Flush pending operations from all encoders to the GPU + void FlushEncoders(); + /// End current render pass without flushing + void EndRenderPass(); + /// Begin a new render pass (may reuse existing) + void BeginRenderPass(NSString* name, GSTexture* color, MTLLoadAction color_load, GSTexture* depth, MTLLoadAction depth_load, GSTexture* stencil = nullptr, MTLLoadAction stencil_load = MTLLoadActionDontCare); + + GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) override; + + void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) override; + void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) override; + void DoFXAA(GSTexture* sTex, GSTexture* dTex) override; + void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) override; + void DoExternalFX(GSTexture* sTex, GSTexture* dTex) override; + + MRCOwned> LoadShader(NSString* name); + MRCOwned> MakePipeline(MTLRenderPipelineDescriptor* desc, id vertex, id fragment, NSString* name); + bool Create(HostDisplay* display) override; + + void ClearRenderTarget(GSTexture* t, const GSVector4& c) override; + void ClearRenderTarget(GSTexture* t, u32 c) override; + void ClearDepth(GSTexture* t) override; + void ClearStencil(GSTexture* t, u8 c) override; + + bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override; + + void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) override; + void DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, id pipeline, bool linear, LoadAction load_action, void* frag_uniform, size_t frag_uniform_len); + void DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds); + /// Copy from a position in sTex to the same position in the currently active render encoder using the given fs pipeline and rect + void RenderCopy(GSTexture* sTex, id pipeline, const GSVector4i& rect); + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override; + void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override; + + void FlushClears(GSTexture* tex); + + // MARK: Main Render Encoder operations + void MRESetHWPipelineState(GSHWDrawConfig::VSSelector vs, GSHWDrawConfig::PSSelector ps, GSHWDrawConfig::BlendState blend, GSHWDrawConfig::ColorMaskSelector cms); + void MRESetDSS(DepthStencilSelector sel); + void MRESetDSS(id dss); + void MRESetSampler(SamplerSelector sel); + void MRESetTexture(GSTexture* tex, int pos); + void MRESetVertices(id buffer, size_t offset); + void MRESetScissor(const GSVector4i& scissor); + void MREClearScissor(); + void MRESetCB(const GSHWDrawConfig::VSConstantBuffer& cb_vs); + void MRESetCB(const GSHWDrawConfig::PSConstantBuffer& cb_ps); + void MRESetBlendColor(u8 blend_color); + void MRESetPipeline(id pipe); + void MREInitHWDraw(GSHWDrawConfig& config, const Map& verts); + + // MARK: Render HW + + void SetupDestinationAlpha(GSTexture* rt, GSTexture* ds, const GSVector4i& r, bool datm); + void RenderHW(GSHWDrawConfig& config) override; + void SendHWDraw(GSHWDrawConfig& config, id enc, id buffer, size_t off); + + // MARK: Debug + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...) override; + void ProcessDebugEntry(id enc, const DebugEntry& entry); + void FlushDebugEntries(id enc); + void EndDebugGroup(id enc); + + // MARK: ImGui + + void RenderImGui(ImDrawData* data); + u32 FrameNo() const { return m_frame; } +}; + +#endif // __APPLE__ diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm new file mode 100644 index 0000000000..35d4aa0e74 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -0,0 +1,1795 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "GSMetalCPPAccessible.h" +#include "GSDeviceMTL.h" +#include "GSTextureMTL.h" +#include "GS/GSPerfMon.h" +#include "HostDisplay.h" +#include + +#ifdef __APPLE__ +#include "GSMTLSharedHeader.h" + +GSDevice* MakeGSDeviceMTL() +{ + return new GSDeviceMTL(); +} + +bool GSDeviceMTL::UsageTracker::PrepareForAllocation(u64 last_draw, size_t amt) +{ + auto removeme = std::find_if(m_usage.begin(), m_usage.end(), [last_draw](UsageEntry usage){ return usage.drawno > last_draw; }); + if (removeme != m_usage.begin()) + m_usage.erase(m_usage.begin(), removeme); + + bool still_in_use = false; + bool needs_wrap = m_pos + amt > m_size; + if (!m_usage.empty()) + { + size_t used = m_usage.front().pos; + if (needs_wrap) + still_in_use = used >= m_pos || used < amt; + else + still_in_use = used >= m_pos && used < m_pos + amt; + } + if (needs_wrap) + m_pos = 0; + + return still_in_use || amt > m_size; +} + +size_t GSDeviceMTL::UsageTracker::Allocate(u64 current_draw, size_t amt) +{ + if (m_usage.empty() || m_usage.back().drawno != current_draw) + m_usage.push_back({current_draw, m_pos}); + size_t ret = m_pos; + m_pos += amt; + return ret; +} + +void GSDeviceMTL::UsageTracker::Reset(size_t new_size) +{ + m_usage.clear(); + m_size = new_size; + m_pos = 0; +} + +GSDeviceMTL::GSDeviceMTL() + : m_backref(std::make_shared>()) + , m_dev(nil) +{ + m_backref->second = this; + m_mipmap = theApp.GetConfigI("mipmap"); +} + +GSDeviceMTL::~GSDeviceMTL() +{ @autoreleasepool { + FlushEncoders(); + std::lock_guard guard(m_backref->first); + m_backref->second = nullptr; +}} + +GSDeviceMTL::Map GSDeviceMTL::Allocate(UploadBuffer& buffer, size_t amt) +{ + amt = (amt + 31) & ~31ull; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + if (unlikely(needs_new)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.mtlbuffer = MRCTransfer([m_dev.dev newBufferWithLength:newsize options:options]); + pxAssertRel(buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.mtlbuffer contents]; + buffer.usage.Reset(newsize); + } + + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + + Map ret = {buffer.mtlbuffer, pos, reinterpret_cast(buffer.buffer) + pos}; + ASSERT(pos <= buffer.usage.Size() && "Previous code should have guaranteed there was enough space"); + return ret; +} + +/// Allocate space in the given buffer for use with the given render command encoder +GSDeviceMTL::Map GSDeviceMTL::Allocate(BufferPair& buffer, size_t amt) +{ + amt = (amt + 31) & ~31ull; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + size_t base_pos = buffer.usage.Pos(); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + bool needs_upload = needs_new || buffer.usage.Pos() == 0; + if (!m_dev.features.unified_memory && needs_upload) + { + if (base_pos != buffer.last_upload) + { + id enc = GetVertexUploadEncoder(); + [enc copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:base_pos - buffer.last_upload]; + } + buffer.last_upload = 0; + } + if (unlikely(needs_new)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.cpubuffer = MRCTransfer([m_dev.dev newBufferWithLength:newsize options:options]); + pxAssertRel(buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.cpubuffer contents]; + buffer.usage.Reset(newsize); + if (!m_dev.features.unified_memory) + { + options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; + buffer.gpubuffer = MRCTransfer([m_dev.dev newBufferWithLength:newsize options:options]); + pxAssertRel(buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + } + } + + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + Map ret = {nil, pos, reinterpret_cast(buffer.buffer) + pos}; + ret.gpu_buffer = m_dev.features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; + ASSERT(pos <= buffer.usage.Size() && "Previous code should have guaranteed there was enough space"); + return ret; +} + +void GSDeviceMTL::Sync(BufferPair& buffer) +{ + if (m_dev.features.unified_memory || buffer.usage.Pos() == buffer.last_upload) + return; + + id enc = GetVertexUploadEncoder(); + [enc copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:buffer.usage.Pos() - buffer.last_upload]; + [enc updateFence:m_draw_sync_fence]; + buffer.last_upload = buffer.usage.Pos(); +} + +id GSDeviceMTL::GetTextureUploadEncoder() +{ + if (!m_texture_upload_cmdbuf) + { + m_texture_upload_cmdbuf = MRCRetain([m_queue commandBuffer]); + m_texture_upload_encoder = MRCRetain([m_texture_upload_cmdbuf blitCommandEncoder]); + pxAssertRel(m_texture_upload_encoder, "Failed to create texture upload encoder!"); + [m_texture_upload_cmdbuf setLabel:@"Texture Upload"]; + } + return m_texture_upload_encoder; +} + +id GSDeviceMTL::GetLateTextureUploadEncoder() +{ + if (!m_late_texture_upload_encoder) + { + EndRenderPass(); + m_late_texture_upload_encoder = MRCRetain([GetRenderCmdBuf() blitCommandEncoder]); + pxAssertRel(m_late_texture_upload_encoder, "Failed to create late texture upload encoder!"); + [m_late_texture_upload_encoder setLabel:@"Late Texture Upload"]; + if (!m_dev.features.unified_memory) + [m_late_texture_upload_encoder waitForFence:m_draw_sync_fence]; + } + return m_late_texture_upload_encoder; +} + +id GSDeviceMTL::GetVertexUploadEncoder() +{ + if (!m_vertex_upload_cmdbuf) + { + m_vertex_upload_cmdbuf = MRCRetain([m_queue commandBuffer]); + m_vertex_upload_encoder = MRCRetain([m_vertex_upload_cmdbuf blitCommandEncoder]); + pxAssertRel(m_vertex_upload_encoder, "Failed to create vertex upload encoder!"); + [m_vertex_upload_cmdbuf setLabel:@"Vertex Upload"]; + } + return m_vertex_upload_encoder; +} + +/// Get the draw command buffer, creating a new one if it doesn't exist +id GSDeviceMTL::GetRenderCmdBuf() +{ + if (!m_current_render_cmdbuf) + { + m_current_render_cmdbuf = MRCRetain([m_queue commandBuffer]); + pxAssertRel(m_current_render_cmdbuf, "Failed to create draw command buffer!"); + [m_current_render_cmdbuf setLabel:@"Draw"]; + } + return m_current_render_cmdbuf; +} + +void GSDeviceMTL::FlushEncoders() +{ + if (!m_current_render_cmdbuf) + return; + EndRenderPass(); + Sync(m_vertex_upload_buf); + if (m_dev.features.unified_memory) + { + ASSERT(!m_vertex_upload_cmdbuf && "Should never be used!"); + } + else if (m_vertex_upload_cmdbuf) + { + [m_vertex_upload_encoder endEncoding]; + [m_vertex_upload_cmdbuf commit]; + m_vertex_upload_encoder = nil; + m_vertex_upload_cmdbuf = nil; + } + if (m_texture_upload_cmdbuf) + { + [m_texture_upload_encoder endEncoding]; + [m_texture_upload_cmdbuf commit]; + m_texture_upload_encoder = nil; + m_texture_upload_cmdbuf = nil; + } + if (m_late_texture_upload_encoder) + { + [m_late_texture_upload_encoder endEncoding]; + m_late_texture_upload_encoder = nil; + } + [m_current_render_cmdbuf addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) + { + std::lock_guard guard(backref->first); + if (GSDeviceMTL* dev = backref->second) + { + // We can do the update non-atomically because we only ever update under the lock + u64 newval = std::max(draw, dev->m_last_finished_draw.load(std::memory_order_relaxed)); + dev->m_last_finished_draw.store(newval, std::memory_order_release); + } + }]; + [m_current_render_cmdbuf commit]; + m_current_render_cmdbuf = nil; + m_current_draw++; +} + +void GSDeviceMTL::EndRenderPass() +{ + if (m_current_render.encoder) + { + EndDebugGroup(m_current_render.encoder); + [m_current_render.encoder endEncoding]; + m_current_render.encoder = nil; + memset(&m_current_render, 0, offsetof(MainRenderEncoder, depth_sel)); + m_current_render.depth_sel = DepthStencilSelector::NoDepth(); + } +} + +void GSDeviceMTL::BeginRenderPass(NSString* name, GSTexture* color, MTLLoadAction color_load, GSTexture* depth, MTLLoadAction depth_load, GSTexture* stencil, MTLLoadAction stencil_load) +{ + GSTextureMTL* mc = static_cast(color); + GSTextureMTL* md = static_cast(depth); + GSTextureMTL* ms = static_cast(stencil); + bool needs_new = color != m_current_render.color_target + || depth != m_current_render.depth_target + || stencil != m_current_render.stencil_target; + GSVector4 color_clear; + float depth_clear; + int stencil_clear; + bool needs_color_clear = false; + bool needs_depth_clear = false; + bool needs_stencil_clear = false; + if (mc) needs_color_clear = mc->GetResetNeedsColorClear(color_clear); + if (md) needs_depth_clear = md->GetResetNeedsDepthClear(depth_clear); + if (ms) needs_stencil_clear = ms->GetResetNeedsStencilClear(stencil_clear); + if (needs_color_clear && color_load != MTLLoadActionDontCare) color_load = MTLLoadActionClear; + if (needs_depth_clear && depth_load != MTLLoadActionDontCare) depth_load = MTLLoadActionClear; + if (needs_stencil_clear && stencil_load != MTLLoadActionDontCare) stencil_load = MTLLoadActionClear; + needs_new |= mc && color_load == MTLLoadActionClear; + needs_new |= md && depth_load == MTLLoadActionClear; + needs_new |= ms && stencil_load == MTLLoadActionClear; + + if (!needs_new) + { + if (m_current_render.name != (__bridge void*)name) + { + m_current_render.name = (__bridge void*)name; + [m_current_render.encoder setLabel:name]; + } + return; + } + + if (m_late_texture_upload_encoder) + { + [m_late_texture_upload_encoder endEncoding]; + m_late_texture_upload_encoder = nullptr; + } + + int idx = 0; + if (mc) idx |= 1; + if (md) idx |= 2; + if (ms) idx |= 4; + + MTLRenderPassDescriptor* desc = m_render_pass_desc[idx]; + if (mc) + { + mc->m_last_write = m_current_draw; + desc.colorAttachments[0].texture = mc->GetTexture(); + if (color_load == MTLLoadActionClear) + desc.colorAttachments[0].clearColor = MTLClearColorMake(color_clear.r, color_clear.g, color_clear.b, color_clear.a); + desc.colorAttachments[0].loadAction = color_load; + } + if (md) + { + md->m_last_write = m_current_draw; + desc.depthAttachment.texture = md->GetTexture(); + if (depth_load == MTLLoadActionClear) + desc.depthAttachment.clearDepth = depth_clear; + desc.depthAttachment.loadAction = depth_load; + } + if (ms) + { + ms->m_last_write = m_current_draw; + desc.stencilAttachment.texture = ms->GetTexture(); + if (stencil_load == MTLLoadActionClear) + desc.stencilAttachment.clearStencil = stencil_clear; + desc.stencilAttachment.loadAction = stencil_load; + } + + EndRenderPass(); + m_current_render.encoder = MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:desc]); + m_current_render.name = (__bridge void*)name; + [m_current_render.encoder setLabel:name]; + if (!m_dev.features.unified_memory) + [m_current_render.encoder waitForFence:m_draw_sync_fence + beforeStages:MTLRenderStageVertex]; + m_current_render.color_target = color; + m_current_render.depth_target = depth; + m_current_render.stencil_target = stencil; + pxAssertRel(m_current_render.encoder, "Failed to create render encoder!"); +} + +static constexpr MTLPixelFormat ConvertPixelFormat(GSTexture::Format format) +{ + switch (format) + { + case GSTexture::Format::PrimID: return MTLPixelFormatR32Float; + case GSTexture::Format::UInt32: return MTLPixelFormatR32Uint; + case GSTexture::Format::UInt16: return MTLPixelFormatR16Uint; + case GSTexture::Format::UNorm8: return MTLPixelFormatA8Unorm; + case GSTexture::Format::Color: return MTLPixelFormatRGBA8Unorm; + case GSTexture::Format::FloatColor: return MTLPixelFormatRGBA32Float; + case GSTexture::Format::DepthStencil: return MTLPixelFormatDepth32Float_Stencil8; + case GSTexture::Format::Invalid: return MTLPixelFormatInvalid; + case GSTexture::Format::BC1: return MTLPixelFormatBC1_RGBA; + case GSTexture::Format::BC2: return MTLPixelFormatBC2_RGBA; + case GSTexture::Format::BC3: return MTLPixelFormatBC3_RGBA; + case GSTexture::Format::BC7: return MTLPixelFormatBC7_RGBAUnorm; + } +} + +GSTexture* GSDeviceMTL::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) +{ @autoreleasepool { + MTLPixelFormat fmt = ConvertPixelFormat(format); + pxAssertRel(format != GSTexture::Format::Invalid, "Can't create surface of this format!"); + + MTLTextureDescriptor* desc = [MTLTextureDescriptor + texture2DDescriptorWithPixelFormat:fmt + width:std::max(1, std::min(width, m_dev.features.max_texsize)) + height:std::max(1, std::min(height, m_dev.features.max_texsize)) + mipmapped:levels > 1]; + + if (levels > 1) + [desc setMipmapLevelCount:levels]; + + [desc setStorageMode:MTLStorageModePrivate]; + switch (type) + { + case GSTexture::Type::Texture: + [desc setUsage:MTLTextureUsageShaderRead]; + break; + case GSTexture::Type::Offscreen: + [desc setUsage:MTLTextureUsageRenderTarget]; + break; + case GSTexture::Type::RenderTarget: + case GSTexture::Type::SparseRenderTarget: + if (m_dev.features.slow_color_compression) + [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget | MTLTextureUsagePixelFormatView]; // Force color compression off by including PixelFormatView + else + [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + break; + default: + [desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget]; + } + + MRCOwned> tex = MRCTransfer([m_dev.dev newTextureWithDescriptor:desc]); + if (tex) + { + GSTextureMTL* t = new GSTextureMTL(this, tex, type, format); + switch (type) + { + case GSTexture::Type::RenderTarget: + ClearRenderTarget(t, 0); + break; + case GSTexture::Type::DepthStencil: + ClearDepth(t); + break; + default: + break; + } + return t; + } + else + { + return nullptr; + } +}} + +void GSDeviceMTL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) +{ @autoreleasepool { + id cmdbuf = GetRenderCmdBuf(); + GSScopedDebugGroupMTL dbg(cmdbuf, @"DoMerge"); + + GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f); + bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1; + bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0; + bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1; + + ClearRenderTarget(dTex, c); + + vector_float4 cb_c = { c.r, c.g, c.b, c.a }; + GSMTLConvertPSUniform cb_yuv = {}; + cb_yuv.emoda = EXTBUF.EMODA; + cb_yuv.emodc = EXTBUF.EMODC; + + if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) + { + // 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output + // Note: value outside of dRect must contains the background color (c) + StretchRect(sTex[1], sRect[1], dTex, dRect[1], ShaderConvert::COPY); + } + + // Save 2nd output + if (feedback_write_2) // FIXME I'm not sure dRect[1] is always correct + DoStretchRect(dTex, full_r, sTex[2], dRect[1], m_convert_pipeline[static_cast(ShaderConvert::YUV)], true, LoadAction::DontCareIfFull, &cb_yuv, sizeof(cb_yuv)); + + if (feedback_write_2_but_blend_bg) + ClearRenderTarget(dTex, c); + + if (sTex[0]) + { + int idx = (PMODE.AMOD << 1) | PMODE.MMOD; + id pipeline = m_merge_pipeline[idx]; + + // 1st output is enabled. It must be blended + if (PMODE.MMOD == 1) + { + // Blend with a constant alpha + DoStretchRect(sTex[0], sRect[0], dTex, dRect[0], pipeline, true, LoadAction::Load, &cb_c, sizeof(cb_c)); + } + else + { + // Blend with 2 * input alpha + DoStretchRect(sTex[0], sRect[0], dTex, dRect[0], pipeline, true, LoadAction::Load, nullptr, 0); + } + } + + if (feedback_write_1) // FIXME I'm not sure dRect[0] is always correct + StretchRect(dTex, full_r, sTex[2], dRect[0], ShaderConvert::YUV); +}} + +void GSDeviceMTL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) +{ @autoreleasepool { + id cmdbuf = GetRenderCmdBuf(); + GSScopedDebugGroupMTL dbg(cmdbuf, @"DoInterlace"); + + GSVector4 s = GSVector4(dTex->GetSize()); + + GSVector4 sRect(0, 0, 1, 1); + GSVector4 dRect(0.f, yoffset, s.x, s.y + yoffset); + + GSMTLInterlacePSUniform cb = {}; + cb.ZrH = {0, 1.f / s.y}; + cb.hH = s.y / 2; + + DoStretchRect(sTex, sRect, dTex, dRect, m_interlace_pipeline[shader], linear, shader > 1 ? LoadAction::DontCareIfFull : LoadAction::Load, &cb, sizeof(cb)); +}} + +void GSDeviceMTL::DoFXAA(GSTexture* sTex, GSTexture* dTex) +{ + BeginRenderPass(@"FXAA", dTex, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare); + RenderCopy(sTex, m_fxaa_pipeline, GSVector4i(0, 0, dTex->GetSize().x, dTex->GetSize().y)); +} + +void GSDeviceMTL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) +{ + BeginRenderPass(@"ShadeBoost", dTex, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare); + [m_current_render.encoder setFragmentBytes:params + length:sizeof(float) * 4 + atIndex:GSMTLBufferIndexUniforms]; + RenderCopy(sTex, m_shadeboost_pipeline, GSVector4i(0, 0, dTex->GetSize().x, dTex->GetSize().y)); +} + +void GSDeviceMTL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) +{ + // TODO: Implement +} + +MRCOwned> GSDeviceMTL::LoadShader(NSString* name) +{ + NSError* err = nil; + MRCOwned> fn = MRCTransfer([m_dev.shaders newFunctionWithName:name constantValues:m_fn_constants error:&err]); + if (unlikely(err)) + { + NSString* msg = [NSString stringWithFormat:@"Failed to load shader %@: %@", name, [err localizedDescription]]; + Console.Error("%s", [msg UTF8String]); + throw GSRecoverableError(); + } + return fn; +} + +MRCOwned> GSDeviceMTL::MakePipeline(MTLRenderPipelineDescriptor* desc, id vertex, id fragment, NSString* name) +{ + [desc setLabel:name]; + [desc setVertexFunction:vertex]; + [desc setFragmentFunction:fragment]; + NSError* err; + MRCOwned> res = MRCTransfer([m_dev.dev newRenderPipelineStateWithDescriptor:desc error:&err]); + if (unlikely(err)) + { + NSString* msg = [NSString stringWithFormat:@"Failed to create pipeline %@: %@", name, [err localizedDescription]]; + Console.Error("%s", [msg UTF8String]); + throw GSRecoverableError(); + } + return res; +} + +static void applyAttribute(MTLVertexDescriptor* desc, NSUInteger idx, MTLVertexFormat fmt, NSUInteger offset, NSUInteger buffer_index) +{ + MTLVertexAttributeDescriptor* attrs = desc.attributes[idx]; + attrs.format = fmt; + attrs.offset = offset; + attrs.bufferIndex = buffer_index; +} + +static void setFnConstantB(MTLFunctionConstantValues* fc, bool value, GSMTLFnConstants constant) +{ + [fc setConstantValue:&value type:MTLDataTypeBool atIndex:constant]; +} + +static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GSMTLFnConstants constant) +{ + [fc setConstantValue:&value type:MTLDataTypeUInt atIndex:constant]; +} + +bool GSDeviceMTL::Create(HostDisplay* display) +{ @autoreleasepool { + if (!GSDevice::Create(display)) + return false; + + if (display->GetRenderAPI() != HostDisplay::RenderAPI::Metal) + return false; + + if (!m_display->HasRenderDevice() || !m_display->HasRenderSurface()) + return false; + m_dev = *static_cast(m_display->GetRenderDevice()); + m_queue = MRCRetain((__bridge id)m_display->GetRenderContext()); + MTLPixelFormat layer_px_fmt = [(__bridge CAMetalLayer*)m_display->GetRenderSurface() pixelFormat]; + + m_features.broken_point_sampler = [[m_dev.dev name] containsString:@"AMD"]; + m_features.geometry_shader = false; + m_features.image_load_store = m_dev.features.primid; + m_features.texture_barrier = true; + m_features.provoking_vertex_last = false; + m_features.point_expand = true; + m_features.line_expand = false; + m_features.prefer_new_textures = true; + m_features.dxt_textures = true; + m_features.bptc_textures = true; + m_features.framebuffer_fetch = m_dev.features.framebuffer_fetch; + m_features.dual_source_blend = true; + m_features.stencil_buffer = true; + + try + { + // Init metal stuff + m_draw_sync_fence = MRCTransfer([m_dev.dev newFence]); + + m_fn_constants = MRCTransfer([MTLFunctionConstantValues new]); + u8 upscale = std::max(1, theApp.GetConfigI("upscale_multiplier")); + vector_uchar2 upscale2 = vector2(upscale, upscale); + [m_fn_constants setConstantValue:&upscale2 type:MTLDataTypeUChar2 atIndex:GSMTLConstantIndex_SCALING_FACTOR]; + setFnConstantB(m_fn_constants, m_dev.features.framebuffer_fetch, GSMTLConstantIndex_FRAMEBUFFER_FETCH); + + m_hw_vertex = MRCTransfer([MTLVertexDescriptor new]); + [[[m_hw_vertex layouts] objectAtIndexedSubscript:GSMTLBufferIndexHWVertices] setStride:sizeof(GSVertex)]; + applyAttribute(m_hw_vertex, GSMTLAttributeIndexST, MTLVertexFormatFloat2, offsetof(GSVertex, ST), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexC, MTLVertexFormatUChar4, offsetof(GSVertex, RGBAQ.R), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexQ, MTLVertexFormatFloat, offsetof(GSVertex, RGBAQ.Q), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexXY, MTLVertexFormatUShort2, offsetof(GSVertex, XYZ.X), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexZ, MTLVertexFormatUInt, offsetof(GSVertex, XYZ.Z), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexUV, MTLVertexFormatUShort2, offsetof(GSVertex, UV), GSMTLBufferIndexHWVertices); + applyAttribute(m_hw_vertex, GSMTLAttributeIndexF, MTLVertexFormatUChar4Normalized, offsetof(GSVertex, FOG), GSMTLBufferIndexHWVertices); + + for (auto& desc : m_render_pass_desc) + { + desc = MRCTransfer([MTLRenderPassDescriptor new]); + [[desc depthAttachment] setStoreAction:MTLStoreActionStore]; + [[desc stencilAttachment] setStoreAction:MTLStoreActionStore]; + } + + // Init samplers + MTLSamplerDescriptor* sdesc = [[MTLSamplerDescriptor new] autorelease]; + const int anisotropy = theApp.GetConfigI("MaxAnisotropy"); + for (size_t i = 0; i < std::size(m_sampler_hw); i++) + { + GSHWDrawConfig::SamplerSelector sel; + sel.key = i; + const char* minname = sel.biln ? "Ln" : "Pt"; + const char* magname = minname; + sdesc.minFilter = sel.biln ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; + sdesc.magFilter = sel.biln ? MTLSamplerMinMagFilterLinear : MTLSamplerMinMagFilterNearest; + switch (static_cast(sel.triln)) + { + case GS_MIN_FILTER::Nearest: + case GS_MIN_FILTER::Linear: + sdesc.mipFilter = MTLSamplerMipFilterNotMipmapped; + break; + case GS_MIN_FILTER::Nearest_Mipmap_Nearest: + minname = "PtPt"; + sdesc.minFilter = MTLSamplerMinMagFilterNearest; + sdesc.mipFilter = MTLSamplerMipFilterNearest; + break; + case GS_MIN_FILTER::Nearest_Mipmap_Linear: + minname = "PtLn"; + sdesc.minFilter = MTLSamplerMinMagFilterNearest; + sdesc.mipFilter = MTLSamplerMipFilterLinear; + break; + case GS_MIN_FILTER::Linear_Mipmap_Nearest: + minname = "LnPt"; + sdesc.minFilter = MTLSamplerMinMagFilterLinear; + sdesc.mipFilter = MTLSamplerMipFilterNearest; + break; + case GS_MIN_FILTER::Linear_Mipmap_Linear: + minname = "LnLn"; + sdesc.minFilter = MTLSamplerMinMagFilterLinear; + sdesc.mipFilter = MTLSamplerMipFilterLinear; + break; + } + + const char* taudesc = sel.tau ? "Repeat" : "Clamp"; + const char* tavdesc = sel.tav == sel.tau ? "" : sel.tav ? "Repeat" : "Clamp"; + sdesc.sAddressMode = sel.tau ? MTLSamplerAddressModeRepeat : MTLSamplerAddressModeClampToEdge; + sdesc.tAddressMode = sel.tav ? MTLSamplerAddressModeRepeat : MTLSamplerAddressModeClampToEdge; + sdesc.rAddressMode = MTLSamplerAddressModeClampToEdge; + + sdesc.maxAnisotropy = anisotropy && sel.aniso ? anisotropy : 1; + sdesc.lodMaxClamp = sel.lodclamp ? 0.25f : FLT_MAX; + + [sdesc setLabel:[NSString stringWithFormat:@"%s%s %s%s", taudesc, tavdesc, magname, minname]]; + m_sampler_hw[i] = MRCTransfer([m_dev.dev newSamplerStateWithDescriptor:sdesc]); + } + + // Init depth stencil states + MTLDepthStencilDescriptor* dssdesc = [[MTLDepthStencilDescriptor new] autorelease]; + MTLStencilDescriptor* stencildesc = [[MTLStencilDescriptor new] autorelease]; + stencildesc.stencilCompareFunction = MTLCompareFunctionAlways; + stencildesc.depthFailureOperation = MTLStencilOperationKeep; + stencildesc.stencilFailureOperation = MTLStencilOperationKeep; + stencildesc.depthStencilPassOperation = MTLStencilOperationReplace; + dssdesc.frontFaceStencil = stencildesc; + dssdesc.backFaceStencil = stencildesc; + [dssdesc setLabel:@"Stencil Write"]; + m_dss_stencil_write = MRCTransfer([m_dev.dev newDepthStencilStateWithDescriptor:dssdesc]); + dssdesc.frontFaceStencil.depthStencilPassOperation = MTLStencilOperationZero; + dssdesc.backFaceStencil.depthStencilPassOperation = MTLStencilOperationZero; + [dssdesc setLabel:@"Stencil Zero"]; + m_dss_stencil_zero = MRCTransfer([m_dev.dev newDepthStencilStateWithDescriptor:dssdesc]); + stencildesc.stencilCompareFunction = MTLCompareFunctionEqual; + stencildesc.readMask = 1; + stencildesc.writeMask = 1; + for (size_t i = 0; i < std::size(m_dss_hw); i++) + { + GSHWDrawConfig::DepthStencilSelector sel; + sel.key = i; + if (sel.date) + { + if (sel.date_one) + stencildesc.depthStencilPassOperation = MTLStencilOperationZero; + else + stencildesc.depthStencilPassOperation = MTLStencilOperationKeep; + dssdesc.frontFaceStencil = stencildesc; + dssdesc.backFaceStencil = stencildesc; + } + else + { + dssdesc.frontFaceStencil = nil; + dssdesc.backFaceStencil = nil; + } + dssdesc.depthWriteEnabled = sel.zwe ? YES : NO; + static constexpr MTLCompareFunction ztst[] = + { + MTLCompareFunctionNever, + MTLCompareFunctionAlways, + MTLCompareFunctionGreaterEqual, + MTLCompareFunctionGreater, + }; + static constexpr const char* ztstname[] = + { + "DepthNever", + "DepthAlways", + "DepthGEq", + "DepthEq", + }; + const char* datedesc = sel.date ? (sel.date_one ? " DATE_ONE" : " DATE") : ""; + const char* zwedesc = sel.zwe ? " ZWE" : ""; + dssdesc.depthCompareFunction = ztst[sel.ztst]; + [dssdesc setLabel:[NSString stringWithFormat:@"%s%s%s", ztstname[sel.ztst], zwedesc, datedesc]]; + m_dss_hw[i] = MRCTransfer([m_dev.dev newDepthStencilStateWithDescriptor:dssdesc]); + } + + // Init HW Vertex Shaders + for (size_t i = 0; i < std::size(m_hw_vs); i++) + { + VSSelector sel; + sel.key = i; + setFnConstantB(m_fn_constants, sel.fst, GSMTLConstantIndex_FST); + setFnConstantB(m_fn_constants, sel.iip, GSMTLConstantIndex_IIP); + setFnConstantB(m_fn_constants, sel.point_size, GSMTLConstantIndex_VS_POINT_SIZE); + m_hw_vs[i] = LoadShader(@"vs_main"); + } + + // Init pipelines + auto vs_convert = LoadShader(@"vs_convert"); + auto fs_triangle = LoadShader(@"fs_triangle"); + auto ps_copy = LoadShader(@"ps_copy"); + auto pdesc = [[MTLRenderPipelineDescriptor new] autorelease]; + // FS Triangle Pipelines + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + m_hdr_resolve_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_mod256"), @"HDR Resolve"); + m_fxaa_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_fxaa"), @"fxaa"); + m_shadeboost_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_shadeboost"), @"shadeboost"); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::FloatColor); + m_hdr_init_pipeline = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_copy_fs"), @"HDR Init"); + pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid; + pdesc.stencilAttachmentPixelFormat = MTLPixelFormatDepth32Float_Stencil8; + m_datm_pipeline[0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_datm0"), @"datm0"); + m_datm_pipeline[1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_datm1"), @"datm1"); + m_stencil_clear_pipeline = MakePipeline(pdesc, fs_triangle, nil, @"Stencil Clear"); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::PrimID); + pdesc.stencilAttachmentPixelFormat = MTLPixelFormatInvalid; + pdesc.depthAttachmentPixelFormat = MTLPixelFormatDepth32Float_Stencil8; + m_primid_init_pipeline[1][0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_init_datm0"), @"PrimID DATM0 Clear"); + m_primid_init_pipeline[1][1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_init_datm1"), @"PrimID DATM1 Clear"); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + m_primid_init_pipeline[0][0] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_init_datm0"), @"PrimID DATM0 Clear"); + m_primid_init_pipeline[0][1] = MakePipeline(pdesc, fs_triangle, LoadShader(@"ps_primid_init_datm1"), @"PrimID DATM1 Clear"); + + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + applyAttribute(pdesc.vertexDescriptor, 0, MTLVertexFormatFloat2, offsetof(ConvertShaderVertex, pos), 0); + applyAttribute(pdesc.vertexDescriptor, 1, MTLVertexFormatFloat2, offsetof(ConvertShaderVertex, texpos), 0); + pdesc.vertexDescriptor.layouts[0].stride = sizeof(ConvertShaderVertex); + + for (size_t i = 0; i < std::size(m_interlace_pipeline); i++) + { + NSString* name = [NSString stringWithFormat:@"ps_interlace%zu", i]; + m_interlace_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), name); + } + for (size_t i = 0; i < std::size(m_convert_pipeline); i++) + { + ShaderConvert conv = static_cast(i); + NSString* name = [NSString stringWithCString:shaderName(conv) encoding:NSUTF8StringEncoding]; + switch (conv) + { + case ShaderConvert::Count: + case ShaderConvert::DATM_0: + case ShaderConvert::DATM_1: + case ShaderConvert::MOD_256: + continue; + case ShaderConvert::COPY: + case ShaderConvert::SCANLINE: + case ShaderConvert::DIAGONAL_FILTER: + case ShaderConvert::TRIANGULAR_FILTER: + case ShaderConvert::COMPLEX_FILTER: + pdesc.colorAttachments[0].pixelFormat = layer_px_fmt; + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + m_present_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), [NSString stringWithFormat:@"present_%s", shaderName(conv) + 3]); + continue; + case ShaderConvert::FLOAT32_TO_32_BITS: + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::UInt32); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + break; + case ShaderConvert::FLOAT32_TO_16_BITS: + case ShaderConvert::RGBA8_TO_16_BITS: + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::UInt16); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + break; + case ShaderConvert::DEPTH_COPY: + case ShaderConvert::RGBA8_TO_FLOAT32: + case ShaderConvert::RGBA8_TO_FLOAT24: + case ShaderConvert::RGBA8_TO_FLOAT16: + case ShaderConvert::RGB5A1_TO_FLOAT16: + pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatInvalid; + pdesc.depthAttachmentPixelFormat = ConvertPixelFormat(GSTexture::Format::DepthStencil); + break; + case ShaderConvert::RGBA_TO_8I: // Yes really + case ShaderConvert::TRANSPARENCY_FILTER: + case ShaderConvert::FLOAT32_TO_RGBA8: + case ShaderConvert::FLOAT16_TO_RGB5A1: + case ShaderConvert::YUV: + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + break; + } + m_convert_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), name); + } + pdesc.depthAttachmentPixelFormat = MTLPixelFormatInvalid; + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::Color); + m_convert_pipeline_copy[0] = MakePipeline(pdesc, vs_convert, ps_copy, @"copy_color"); + pdesc.colorAttachments[0].pixelFormat = ConvertPixelFormat(GSTexture::Format::FloatColor); + m_convert_pipeline_copy[1] = MakePipeline(pdesc, vs_convert, ps_copy, @"copy_hdr"); + + pdesc.colorAttachments[0].pixelFormat = MTLPixelFormatRGBA8Unorm; + for (size_t i = 0; i < std::size(m_convert_pipeline_copy_mask); i++) + { + MTLColorWriteMask mask = MTLColorWriteMaskNone; + if (i & 1) mask |= MTLColorWriteMaskRed; + if (i & 2) mask |= MTLColorWriteMaskGreen; + if (i & 4) mask |= MTLColorWriteMaskBlue; + if (i & 8) mask |= MTLColorWriteMaskAlpha; + NSString* name = [NSString stringWithFormat:@"copy_%s%s%s%s", i & 1 ? "r" : "", i & 2 ? "g" : "", i & 4 ? "b" : "", i & 8 ? "a" : ""]; + pdesc.colorAttachments[0].writeMask = mask; + m_convert_pipeline_copy_mask[i] = MakePipeline(pdesc, vs_convert, ps_copy, name); + } + + pdesc.colorAttachments[0].blendingEnabled = YES; + pdesc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; + pdesc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; + pdesc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + for (size_t i = 0; i < std::size(m_merge_pipeline); i++) + { + bool mmod = i & 1; + bool amod = i & 2; + NSString* name = [NSString stringWithFormat:@"ps_merge%zu", mmod]; + NSString* pipename = [NSString stringWithFormat:@"Merge%s%s", mmod ? " MMOD" : "", amod ? " AMOD" : ""]; + pdesc.colorAttachments[0].writeMask = amod ? MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue : MTLColorWriteMaskAll; + m_merge_pipeline[i] = MakePipeline(pdesc, vs_convert, LoadShader(name), pipename); + } + pdesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; + + applyAttribute(pdesc.vertexDescriptor, 0, MTLVertexFormatFloat2, offsetof(ImDrawVert, pos), 0); + applyAttribute(pdesc.vertexDescriptor, 1, MTLVertexFormatFloat2, offsetof(ImDrawVert, uv), 0); + applyAttribute(pdesc.vertexDescriptor, 2, MTLVertexFormatUChar4Normalized, offsetof(ImDrawVert, col), 0); + pdesc.vertexDescriptor.layouts[0].stride = sizeof(ImDrawVert); + pdesc.colorAttachments[0].pixelFormat = layer_px_fmt; + m_imgui_pipeline = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui"), @"imgui"); + if (!m_dev.features.texture_swizzle) + m_imgui_pipeline_a8 = MakePipeline(pdesc, LoadShader(@"vs_imgui"), LoadShader(@"ps_imgui_a8"), @"imgui_a8"); + } + catch (GSRecoverableError&) + { + return false; + } + return true; +}} + +void GSDeviceMTL::ClearRenderTarget(GSTexture* t, const GSVector4& c) +{ + if (!t) return; + static_cast(t)->RequestColorClear(c); +} + +void GSDeviceMTL::ClearRenderTarget(GSTexture* t, uint32 c) +{ + GSVector4 color = GSVector4::rgba32(c) * (1.f / 255.f); + ClearRenderTarget(t, color); +} + +void GSDeviceMTL::ClearDepth(GSTexture* t) +{ + if (!t) return; + static_cast(t)->RequestDepthClear(0); +} + +void GSDeviceMTL::ClearStencil(GSTexture* t, uint8 c) +{ + if (!t) return; + static_cast(t)->RequestStencilClear(c); +} + +bool GSDeviceMTL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) +{ @autoreleasepool { + ASSERT(src); + EndRenderPass(); + GSTextureMTL* msrc = static_cast(src); + out_map.pitch = msrc->GetCompressedBytesPerBlock() * rect.width(); + size_t size = out_map.pitch * rect.height(); + if ([m_texture_download_buf length] < size) + m_texture_download_buf = MRCTransfer([m_dev.dev newBufferWithLength:size options:MTLResourceStorageModeShared]); + pxAssertRel(m_texture_download_buf, "Failed to allocate download buffer (out of memory?)"); + + MRCOwned> cmdbuf = MRCRetain(GetRenderCmdBuf()); + [cmdbuf pushDebugGroup:@"DownloadTexture"]; + id encoder = [cmdbuf blitCommandEncoder]; + [encoder copyFromTexture:msrc->GetTexture() + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(rect.x, rect.y, 0) + sourceSize:MTLSizeMake(rect.width(), rect.height(), 1) + toBuffer:m_texture_download_buf + destinationOffset:0 + destinationBytesPerRow:out_map.pitch + destinationBytesPerImage:size]; + [encoder endEncoding]; + [cmdbuf popDebugGroup]; + + FlushEncoders(); + [cmdbuf waitUntilCompleted]; + + out_map.bits = static_cast([m_texture_download_buf contents]); + g_perfmon.Put(GSPerfMon::Readbacks, 1); + return true; +}} + +void GSDeviceMTL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) +{ @autoreleasepool { + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + + GSTextureMTL* sT = static_cast(sTex); + GSTextureMTL* dT = static_cast(dTex); + + // Process clears + GSVector2i dsize = dTex->GetSize(); + if (r.width() < dsize.x || r.height() < dsize.y) + dT->FlushClears(); + else + dT->InvalidateClears(); + + EndRenderPass(); + + sT->m_last_read = m_current_draw; + dT->m_last_write = m_current_draw; + + id cmdbuf = GetRenderCmdBuf(); + id encoder = [cmdbuf blitCommandEncoder]; + [encoder setLabel:@"CopyRect"]; + [encoder copyFromTexture:sT->GetTexture() + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(r.x, r.y, 0) + sourceSize:MTLSizeMake(r.width(), r.height(), 1) + toTexture:dT->GetTexture() + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [encoder endEncoding]; +}} + +void GSDeviceMTL::DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, id pipeline, bool linear, LoadAction load_action, void* frag_uniform, size_t frag_uniform_len) +{ + BeginScene(); + + FlushClears(sTex); + + GSTextureMTL* sT = static_cast(sTex); + GSTextureMTL* dT = static_cast(dTex); + + GSVector2i ds = dT->GetSize(); + + bool covers_target = static_cast(dRect.x) <= 0 + && static_cast(dRect.y) <= 0 + && static_cast(dRect.z) >= ds.x + && static_cast(dRect.w) >= ds.y; + bool dontcare = load_action == LoadAction::DontCare || (load_action == LoadAction::DontCareIfFull && covers_target); + MTLLoadAction action = dontcare ? MTLLoadActionDontCare : MTLLoadActionLoad; + + if (dT->GetFormat() == GSTexture::Format::DepthStencil) + BeginRenderPass(@"StretchRect", nullptr, MTLLoadActionDontCare, dT, action); + else + BeginRenderPass(@"StretchRect", dT, action, nullptr, MTLLoadActionDontCare); + + FlushDebugEntries(m_current_render.encoder); + MREClearScissor(); + DepthStencilSelector dsel; + dsel.ztst = ZTST_ALWAYS; + dsel.zwe = dT->GetFormat() == GSTexture::Format::DepthStencil; + MRESetDSS(dsel); + + MRESetPipeline(pipeline); + MRESetTexture(sT, GSMTLTextureIndexNonHW); + + if (frag_uniform && frag_uniform_len) + [m_current_render.encoder setFragmentBytes:frag_uniform length:frag_uniform_len atIndex:GSMTLBufferIndexUniforms]; + + MRESetSampler(linear ? SamplerSelector::Linear() : SamplerSelector::Point()); + + DrawStretchRect(sRect, dRect, ds); + + EndScene(); +} + +void GSDeviceMTL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds) +{ + float left = dRect.x * 2 / ds.x - 1.0f; + float right = dRect.z * 2 / ds.x - 1.0f; + float top = 1.0f - dRect.y * 2 / ds.y; + float bottom = 1.0f - dRect.w * 2 / ds.y; + + ConvertShaderVertex vertices[] = + { + {{left, top}, {sRect.x, sRect.y}}, + {{right, top}, {sRect.z, sRect.y}}, + {{left, bottom}, {sRect.x, sRect.w}}, + {{right, bottom}, {sRect.z, sRect.w}} + }; + + [m_current_render.encoder setVertexBytes:vertices length:sizeof(vertices) atIndex:GSMTLBufferIndexVertices]; + + [m_current_render.encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip + vertexStart:0 + vertexCount:4]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); +} + +void GSDeviceMTL::RenderCopy(GSTexture* sTex, id pipeline, const GSVector4i& rect) +{ + // FS Triangle encoder uses vertex ID alone to make a FS triangle, which we then scissor to the desired rectangle + MRESetScissor(rect); + MRESetPipeline(pipeline); + MRESetTexture(sTex, GSMTLTextureIndexNonHW); + [m_current_render.encoder drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); +} + +void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear) +{ @autoreleasepool { + if (!dTex) + { + // !dTex → "Present with the current draw encoder" + [m_current_render.encoder setRenderPipelineState:m_present_pipeline[static_cast(shader)]]; + [m_current_render.encoder setFragmentSamplerState:m_sampler_hw[linear ? SamplerSelector::Linear().key : SamplerSelector::Point().key] atIndex:0]; + [m_current_render.encoder setFragmentTexture:static_cast(sTex)->GetTexture() atIndex:0]; + DrawStretchRect(sRect, dRect, GSVector2i(m_display->GetWindowWidth(), m_display->GetWindowHeight())); + return; + } + + id pipeline; + if (shader == ShaderConvert::COPY) + pipeline = m_convert_pipeline_copy[dTex->GetFormat() == GSTexture::Format::Color ? 0 : 1]; + else + pipeline = m_convert_pipeline[static_cast(shader)]; + + if (!pipeline) + [NSException raise:@"StretchRect Missing Pipeline" format:@"No pipeline for %d", static_cast(shader)]; + + DoStretchRect(sTex, sRect, dTex, dRect, pipeline, linear, LoadAction::DontCareIfFull, nullptr, 0); +}} + +void GSDeviceMTL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) +{ @autoreleasepool { + int sel = 0; + if (red) sel |= 1; + if (green) sel |= 2; + if (blue) sel |= 4; + if (alpha) sel |= 8; + + id pipeline = m_convert_pipeline_copy_mask[sel]; + + DoStretchRect(sTex, sRect, dTex, dRect, pipeline, false, sel == 15 ? LoadAction::DontCareIfFull : LoadAction::Load, nullptr, 0); +}} + +void GSDeviceMTL::FlushClears(GSTexture* tex) +{ + if (tex) + static_cast(tex)->FlushClears(); +} + +// MARK: - MainRenderEncoder Operations + +static MTLBlendFactor ConvertBlendFactor(GSDevice::BlendFactor generic) +{ + switch (generic) + { + case GSDevice::SRC_COLOR: return MTLBlendFactorSourceColor; + case GSDevice::INV_SRC_COLOR: return MTLBlendFactorOneMinusSourceColor; + case GSDevice::DST_COLOR: return MTLBlendFactorDestinationColor; + case GSDevice::INV_DST_COLOR: return MTLBlendFactorOneMinusBlendColor; + case GSDevice::SRC1_COLOR: return MTLBlendFactorSource1Color; + case GSDevice::INV_SRC1_COLOR: return MTLBlendFactorOneMinusSource1Color; + case GSDevice::SRC_ALPHA: return MTLBlendFactorSourceAlpha; + case GSDevice::INV_SRC_ALPHA: return MTLBlendFactorOneMinusSourceAlpha; + case GSDevice::DST_ALPHA: return MTLBlendFactorDestinationAlpha; + case GSDevice::INV_DST_ALPHA: return MTLBlendFactorOneMinusDestinationAlpha; + case GSDevice::SRC1_ALPHA: return MTLBlendFactorSource1Alpha; + case GSDevice::INV_SRC1_ALPHA: return MTLBlendFactorOneMinusSource1Alpha; + case GSDevice::CONST_COLOR: return MTLBlendFactorBlendColor; + case GSDevice::INV_CONST_COLOR: return MTLBlendFactorOneMinusBlendColor; + case GSDevice::CONST_ONE: return MTLBlendFactorOne; + case GSDevice::CONST_ZERO: return MTLBlendFactorZero; + } +} + +static MTLBlendOperation ConvertBlendOp(GSDevice::BlendOp generic) +{ + switch (generic) + { + case GSDevice::OP_ADD: return MTLBlendOperationAdd; + case GSDevice::OP_SUBTRACT: return MTLBlendOperationSubtract; + case GSDevice::OP_REV_SUBTRACT: return MTLBlendOperationReverseSubtract; + } +} + +void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDrawConfig::PSSelector pssel, GSHWDrawConfig::BlendState blend, GSHWDrawConfig::ColorMaskSelector cms) +{ + PipelineSelectorExtrasMTL extras(blend, m_current_render.color_target, cms, m_current_render.depth_target, m_current_render.stencil_target); + PipelineSelectorMTL fullsel(vssel, pssel, extras); + if (m_current_render.has.pipeline_sel && fullsel == m_current_render.pipeline_sel) + return; + m_current_render.pipeline_sel = fullsel; + m_current_render.has.pipeline_sel = true; + auto idx = m_hw_pipeline.find(fullsel); + if (idx != m_hw_pipeline.end()) + { + [m_current_render.encoder setRenderPipelineState:idx->second]; + return; + } + + bool primid_tracking_init = pssel.date == 1 || pssel.date == 2; + + VSSelector vssel_mtl; + vssel_mtl.fst = vssel.fst; + vssel_mtl.iip = vssel.iip; + vssel_mtl.point_size = vssel.point_size; + id vs = m_hw_vs[vssel_mtl.key]; + + id ps; + auto idx2 = m_hw_ps.find(pssel); + if (idx2 != m_hw_ps.end()) + { + ps = idx2->second; + } + else + { + setFnConstantB(m_fn_constants, pssel.fst, GSMTLConstantIndex_FST); + setFnConstantB(m_fn_constants, pssel.iip, GSMTLConstantIndex_IIP); + setFnConstantI(m_fn_constants, pssel.aem_fmt, GSMTLConstantIndex_PS_AEM_FMT); + setFnConstantI(m_fn_constants, pssel.pal_fmt, GSMTLConstantIndex_PS_PAL_FMT); + setFnConstantI(m_fn_constants, pssel.dfmt, GSMTLConstantIndex_PS_DFMT); + setFnConstantI(m_fn_constants, pssel.depth_fmt, GSMTLConstantIndex_PS_DEPTH_FMT); + setFnConstantB(m_fn_constants, pssel.aem, GSMTLConstantIndex_PS_AEM); + setFnConstantB(m_fn_constants, pssel.fba, GSMTLConstantIndex_PS_FBA); + setFnConstantB(m_fn_constants, pssel.fog, GSMTLConstantIndex_PS_FOG); + setFnConstantI(m_fn_constants, pssel.date, GSMTLConstantIndex_PS_DATE); + setFnConstantI(m_fn_constants, pssel.atst, GSMTLConstantIndex_PS_ATST); + setFnConstantI(m_fn_constants, pssel.tfx, GSMTLConstantIndex_PS_TFX); + setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC); + setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS); + setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT); + setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); + setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); + setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); + setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG); + setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK); + setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A); + setFnConstantI(m_fn_constants, pssel.blend_b, GSMTLConstantIndex_PS_BLEND_B); + setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C); + setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D); + setFnConstantI(m_fn_constants, pssel.clr_hw, GSMTLConstantIndex_PS_CLR_HW); + setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR); + setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP); + setFnConstantB(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX); + setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE); + setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR); + setFnConstantB(m_fn_constants, pssel.no_color1, GSMTLConstantIndex_PS_NO_COLOR1); + // no_ablend ignored for now (No Metal driver has had DSB so broken that it's needed to be disabled, though Intel's was pretty close) + setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA); + setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL); + setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER); + setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP); + setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK); + setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE); + setFnConstantB(m_fn_constants, pssel.tales_of_abyss_hle, GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE); + setFnConstantB(m_fn_constants, pssel.tex_is_fb, GSMTLConstantIndex_PS_TEX_IS_FB); + setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD); + setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD); + setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER); + setFnConstantB(m_fn_constants, pssel.invalid_tex0, GSMTLConstantIndex_PS_INVALID_TEX0); + setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK); + auto newps = LoadShader(@"ps_main"); + ps = newps; + m_hw_ps.insert(std::make_pair(pssel, std::move(newps))); + } + + MRCOwned pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [pdesc setVertexDescriptor:m_hw_vertex]; + MTLRenderPipelineColorAttachmentDescriptor* color = [[pdesc colorAttachments] objectAtIndexedSubscript:0]; + color.pixelFormat = ConvertPixelFormat(extras.rt); + [pdesc setDepthAttachmentPixelFormat:extras.has_depth ? MTLPixelFormatDepth32Float_Stencil8 : MTLPixelFormatInvalid]; + [pdesc setStencilAttachmentPixelFormat:extras.has_stencil ? MTLPixelFormatDepth32Float_Stencil8 : MTLPixelFormatInvalid]; + color.writeMask = extras.writemask; + if (primid_tracking_init) + { + color.blendingEnabled = YES; + color.rgbBlendOperation = MTLBlendOperationMin; + color.sourceRGBBlendFactor = MTLBlendFactorOne; + color.destinationRGBBlendFactor = MTLBlendFactorOne; + } + else if (extras.blend_enable) + { + color.blendingEnabled = YES; + color.rgbBlendOperation = ConvertBlendOp(extras.blend_op); + color.sourceRGBBlendFactor = ConvertBlendFactor(extras.src_factor); + color.destinationRGBBlendFactor = ConvertBlendFactor(extras.dst_factor); + } + NSString* pname = [NSString stringWithFormat:@"HW Render %x.%x.%llx.%x", vssel_mtl.key, pssel.key_hi, pssel.key_lo, extras.fullkey()]; + auto pipeline = MakePipeline(pdesc, vs, ps, pname); + + [m_current_render.encoder setRenderPipelineState:pipeline]; + m_hw_pipeline.insert(std::make_pair(fullsel, std::move(pipeline))); +} + +void GSDeviceMTL::MRESetDSS(DepthStencilSelector sel) +{ + if (!m_current_render.depth_target || m_current_render.depth_sel.key == sel.key) + return; + [m_current_render.encoder setDepthStencilState:m_dss_hw[sel.key]]; + m_current_render.depth_sel = sel; +} + +void GSDeviceMTL::MRESetDSS(id dss) +{ + [m_current_render.encoder setDepthStencilState:dss]; + m_current_render.depth_sel.key = -1; +} + +void GSDeviceMTL::MRESetSampler(SamplerSelector sel) +{ + if (m_current_render.has.sampler && m_current_render.sampler_sel.key == sel.key) + return; + [m_current_render.encoder setFragmentSamplerState:m_sampler_hw[sel.key] atIndex:0]; + m_current_render.sampler_sel = sel; + m_current_render.has.sampler = true; +} + +static void textureBarrier(id enc) +{ + if (@available(macOS 10.14, *)) { + [enc memoryBarrierWithScope:MTLBarrierScopeRenderTargets + afterStages:MTLRenderStageFragment + beforeStages:MTLRenderStageFragment]; + } else { + [enc textureBarrier]; + } +} + +void GSDeviceMTL::MRESetTexture(GSTexture* tex, int pos) +{ + if (tex == m_current_render.tex[pos]) + return; + m_current_render.tex[pos] = tex; + if (GSTextureMTL* mtex = static_cast(tex)) + { + [m_current_render.encoder setFragmentTexture:mtex->GetTexture() atIndex:pos]; + mtex->m_last_read = m_current_draw; + } +} + +void GSDeviceMTL::MRESetVertices(id buffer, size_t offset) +{ + if (m_current_render.vertex_buffer != (__bridge void*)buffer) + { + m_current_render.vertex_buffer = (__bridge void*)buffer; + [m_current_render.encoder setVertexBuffer:buffer offset:offset atIndex:GSMTLBufferIndexHWVertices]; + } + else + { + [m_current_render.encoder setVertexBufferOffset:offset atIndex:GSMTLBufferIndexHWVertices]; + } +} + +void GSDeviceMTL::MRESetScissor(const GSVector4i& scissor) +{ + if (m_current_render.has.scissor && (m_current_render.scissor == scissor).alltrue()) + return; + MTLScissorRect r; + r.x = scissor.x; + r.y = scissor.y; + r.width = scissor.width(); + r.height = scissor.height(); + [m_current_render.encoder setScissorRect:r]; + m_current_render.scissor = scissor; + m_current_render.has.scissor = true; +} + +void GSDeviceMTL::MREClearScissor() +{ + if (!m_current_render.has.scissor) + return; + m_current_render.has.scissor = false; + GSVector4i size = GSVector4i(0); + if (m_current_render.color_target) size = size.max_u32(GSVector4i(m_current_render.color_target ->GetSize())); + if (m_current_render.depth_target) size = size.max_u32(GSVector4i(m_current_render.depth_target ->GetSize())); + if (m_current_render.stencil_target) size = size.max_u32(GSVector4i(m_current_render.stencil_target->GetSize())); + MTLScissorRect r; + r.x = 0; + r.y = 0; + r.width = size.x; + r.height = size.y; + [m_current_render.encoder setScissorRect:r]; +} + +void GSDeviceMTL::MRESetCB(const GSHWDrawConfig::VSConstantBuffer& cb) +{ + if (m_current_render.has.cb_vs && m_current_render.cb_vs == cb) + return; + [m_current_render.encoder setVertexBytes:&cb length:sizeof(cb) atIndex:GSMTLBufferIndexHWUniforms]; + m_current_render.has.cb_vs = true; + m_current_render.cb_vs = cb; +} + +void GSDeviceMTL::MRESetCB(const GSHWDrawConfig::PSConstantBuffer& cb) +{ + if (m_current_render.has.cb_ps && m_current_render.cb_ps == cb) + return; + [m_current_render.encoder setFragmentBytes:&cb length:sizeof(cb) atIndex:GSMTLBufferIndexHWUniforms]; + m_current_render.has.cb_ps = true; + m_current_render.cb_ps = cb; +} + +void GSDeviceMTL::MRESetBlendColor(u8 color) +{ + if (m_current_render.has.blend_color && m_current_render.blend_color == color) + return; + float fc = static_cast(color) / 128.f; + [m_current_render.encoder setBlendColorRed:fc green:fc blue:fc alpha:fc]; + m_current_render.has.blend_color = true; + m_current_render.blend_color = color; +} + +void GSDeviceMTL::MRESetPipeline(id pipe) +{ + [m_current_render.encoder setRenderPipelineState:pipe]; + m_current_render.has.pipeline_sel = false; +} + +// MARK: - HW Render + +// Metal can't import GSDevice.h, but we should make sure the structs are at least compatible +static_assert(sizeof(GSHWDrawConfig::VSConstantBuffer) == sizeof(GSMTLMainVSUniform)); +static_assert(sizeof(GSHWDrawConfig::PSConstantBuffer) == sizeof(GSMTLMainPSUniform)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, vertex_scale) == offsetof(GSMTLMainVSUniform, vertex_scale)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, vertex_offset) == offsetof(GSMTLMainVSUniform, vertex_offset)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, texture_scale) == offsetof(GSMTLMainVSUniform, texture_scale)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, texture_offset) == offsetof(GSMTLMainVSUniform, texture_offset)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, point_size) == offsetof(GSMTLMainVSUniform, point_size)); +static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, max_depth) == offsetof(GSMTLMainVSUniform, max_depth)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FogColor_AREF.x) == offsetof(GSMTLMainPSUniform, fog_color)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FogColor_AREF.a) == offsetof(GSMTLMainPSUniform, aref)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, WH) == offsetof(GSMTLMainPSUniform, wh)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.x) == offsetof(GSMTLMainPSUniform, ta)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.z) == offsetof(GSMTLMainPSUniform, max_depth)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TA_MaxDepth_Af.w) == offsetof(GSMTLMainPSUniform, alpha_fix)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MskFix) == offsetof(GSMTLMainPSUniform, uv_msk_fix)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, FbMask) == offsetof(GSMTLMainPSUniform, fbmask)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, HalfTexel) == offsetof(GSMTLMainPSUniform, half_texel)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, MinMax) == offsetof(GSMTLMainPSUniform, uv_min_max)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, ChannelShuffle) == offsetof(GSMTLMainPSUniform, channel_shuffle)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, TCOffsetHack) == offsetof(GSMTLMainPSUniform, tc_offset)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, STScale) == offsetof(GSMTLMainPSUniform, st_scale)); +static_assert(offsetof(GSHWDrawConfig::PSConstantBuffer, DitherMatrix) == offsetof(GSMTLMainPSUniform, dither_matrix)); + +void GSDeviceMTL::SetupDestinationAlpha(GSTexture* rt, GSTexture* ds, const GSVector4i& r, bool datm) +{ + BeginScene(); + FlushClears(rt); + BeginRenderPass(@"Destination Alpha Setup", nullptr, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare, ds, MTLLoadActionDontCare); + [m_current_render.encoder setStencilReferenceValue:1]; + MRESetDSS(m_dss_stencil_zero); + RenderCopy(nullptr, m_stencil_clear_pipeline, r); + MRESetDSS(m_dss_stencil_write); + RenderCopy(rt, m_datm_pipeline[datm], r); + EndScene(); +} + +static id getTexture(GSTexture* tex) +{ + return tex ? static_cast(tex)->GetTexture() : nil; +} + +void GSDeviceMTL::MREInitHWDraw(GSHWDrawConfig& config, const Map& verts) +{ + MRESetScissor(config.scissor); + MRESetTexture(config.tex, GSMTLTextureIndexTex); + MRESetTexture(config.pal, GSMTLTextureIndexPalette); + MRESetSampler(config.sampler); + MRESetCB(config.cb_vs); + MRESetCB(config.cb_ps); + MRESetVertices(verts.gpu_buffer, verts.gpu_offset); +} + +void GSDeviceMTL::RenderHW(GSHWDrawConfig& config) +{ @autoreleasepool { + if (config.topology == GSHWDrawConfig::Topology::Point) + config.vs.point_size = 1; // M1 requires point size output on *all* points + + if (config.tex && config.ds == config.tex) + EndRenderPass(); // Barrier + + size_t vertsize = config.nverts * sizeof(*config.verts); + size_t idxsize = config.nindices * sizeof(*config.indices); + Map allocation = Allocate(m_vertex_upload_buf, vertsize + idxsize); + memcpy(allocation.cpu_buffer, config.verts, vertsize); + memcpy(static_cast(allocation.cpu_buffer) + vertsize, config.indices, idxsize); + + FlushClears(config.tex); + FlushClears(config.pal); + + GSTexture* stencil = nullptr; + GSTexture* primid_tex = nullptr; + GSTexture* rt = config.rt; + switch (config.destination_alpha) + { + case GSHWDrawConfig::DestinationAlphaMode::Off: + case GSHWDrawConfig::DestinationAlphaMode::Full: + break; // No setup + case GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking: + { + FlushClears(config.rt); + GSVector2i size = config.rt->GetSize(); + primid_tex = CreateRenderTarget(size.x, size.y, GSTexture::Format::PrimID); + DepthStencilSelector dsel = config.depth; + dsel.zwe = 0; + GSTexture* depth = dsel.key == DepthStencilSelector::NoDepth().key ? nullptr : config.ds; + BeginRenderPass(@"PrimID Destination Alpha Init", primid_tex, MTLLoadActionDontCare, depth, MTLLoadActionLoad); + RenderCopy(config.rt, m_primid_init_pipeline[static_cast(depth)][config.datm], config.drawarea); + MRESetDSS(dsel); + ASSERT(config.ps.date == 1 || config.ps.date == 2); + if (config.ps.tex_is_fb) + MRESetTexture(config.rt, GSMTLTextureIndexRenderTarget); + config.require_one_barrier = false; // Ending render pass is our barrier + ASSERT(config.require_full_barrier == false && config.drawlist == nullptr); + MRESetHWPipelineState(config.vs, config.ps, {}, {}); + MREInitHWDraw(config, allocation); + SendHWDraw(config, m_current_render.encoder, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + config.ps.date = 3; + break; + } + case GSHWDrawConfig::DestinationAlphaMode::StencilOne: + BeginRenderPass(@"Destination Alpha Stencil Clear", nullptr, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare, config.ds, MTLLoadActionDontCare); + [m_current_render.encoder setStencilReferenceValue:1]; + MRESetDSS(m_dss_stencil_write); + RenderCopy(nullptr, m_stencil_clear_pipeline, config.drawarea); + stencil = config.ds; + break; + case GSHWDrawConfig::DestinationAlphaMode::Stencil: + SetupDestinationAlpha(config.rt, config.ds, config.drawarea, config.datm); + stencil = config.ds; + break; + } + + BeginScene(); + GSTexture* hdr_rt = nullptr; + if (config.ps.hdr) + { + GSVector2i size = config.rt->GetSize(); + hdr_rt = CreateRenderTarget(size.x, size.y, GSTexture::Format::FloatColor); + BeginRenderPass(@"HDR Init", hdr_rt, MTLLoadActionDontCare, nullptr, MTLLoadActionDontCare); + RenderCopy(config.rt, m_hdr_init_pipeline, config.drawarea); + rt = hdr_rt; + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + } + + // Try to reduce render pass restarts + if (!stencil && config.depth.key == DepthStencilSelector::NoDepth().key && (m_current_render.color_target != rt || m_current_render.depth_target != config.ds)) + config.ds = nullptr; + if (!config.ds && m_current_render.color_target == rt && stencil == m_current_render.stencil_target && m_current_render.depth_target != config.tex) + config.ds = m_current_render.depth_target; + + BeginRenderPass(@"RenderHW", rt, MTLLoadActionLoad, config.ds, MTLLoadActionLoad, stencil, MTLLoadActionLoad); + id mtlenc = m_current_render.encoder; + FlushDebugEntries(mtlenc); + MREInitHWDraw(config, allocation); + if (config.require_one_barrier || config.require_full_barrier) + MRESetTexture(config.rt, GSMTLTextureIndexRenderTarget); + if (primid_tex) + MRESetTexture(primid_tex, GSMTLTextureIndexPrimIDs); + if (config.blend.constant_enable) + MRESetBlendColor(config.blend.constant); + MRESetHWPipelineState(config.vs, config.ps, config.blend, config.colormask); + MRESetDSS(config.depth); + + SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + + if (config.alpha_second_pass.enable) + { + if (config.alpha_second_pass.ps_aref != config.cb_ps.FogColor_AREF.a) + { + config.cb_ps.FogColor_AREF.a = config.alpha_second_pass.ps_aref; + MRESetCB(config.cb_ps); + } + MRESetHWPipelineState(config.vs, config.alpha_second_pass.ps, config.blend, config.alpha_second_pass.colormask); + MRESetDSS(config.alpha_second_pass.depth); + SendHWDraw(config, mtlenc, allocation.gpu_buffer, allocation.gpu_offset + vertsize); + } + + if (hdr_rt) + { + BeginRenderPass(@"HDR Resolve", config.rt, MTLLoadActionLoad, nullptr, MTLLoadActionDontCare); + RenderCopy(hdr_rt, m_hdr_resolve_pipeline, config.drawarea); + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + + Recycle(hdr_rt); + } + + if (primid_tex) + Recycle(primid_tex); +}} + +void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id enc, id buffer, size_t off) +{ + MTLPrimitiveType topology; + switch (config.topology) + { + case GSHWDrawConfig::Topology::Point: topology = MTLPrimitiveTypePoint; break; + case GSHWDrawConfig::Topology::Line: topology = MTLPrimitiveTypeLine; break; + case GSHWDrawConfig::Topology::Triangle: topology = MTLPrimitiveTypeTriangle; break; + } + + if (config.drawlist) + { + [enc pushDebugGroup:[NSString stringWithFormat:@"Full barrier split draw (%d sprites in %d groups)", config.nindices / config.indices_per_prim, config.drawlist->size()]]; +#if defined(_DEBUG) + // Check how draw call is split. + std::map frequency; + for (const auto& it : *config.drawlist) + ++frequency[it]; + + std::string message; + for (const auto& it : frequency) + message += " " + std::to_string(it.first) + "(" + std::to_string(it.second) + ")"; + + [enc insertDebugSignpost:[NSString stringWithFormat:@"Split single draw (%d sprites) into %zu draws: consecutive draws(frequency):%s", + config.nindices / config.indices_per_prim, config.drawlist->size(), message.c_str()]]; +#endif + + for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n) + { + count = (*config.drawlist)[n] * config.indices_per_prim; + textureBarrier(enc); + [enc drawIndexedPrimitives:topology + indexCount:count + indexType:MTLIndexTypeUInt32 + indexBuffer:buffer + indexBufferOffset:off + p * sizeof(*config.indices)]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + } + [enc popDebugGroup]; + } + else if (config.require_full_barrier) + { + [enc pushDebugGroup:[NSString stringWithFormat:@"Full barrier split draw (%d prims)", config.nindices / config.indices_per_prim]]; + for (size_t p = 0; p < config.nindices; p += config.indices_per_prim) + { + textureBarrier(enc); + [enc drawIndexedPrimitives:topology + indexCount:config.indices_per_prim + indexType:MTLIndexTypeUInt32 + indexBuffer:buffer + indexBufferOffset:off + p * sizeof(*config.indices)]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + } + [enc popDebugGroup]; + } + else if (config.require_one_barrier) + { + // One barrier needed + textureBarrier(enc); + [enc drawIndexedPrimitives:topology + indexCount:config.nindices + indexType:MTLIndexTypeUInt32 + indexBuffer:buffer + indexBufferOffset:off]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + } + else + { + // No barriers needed + [enc drawIndexedPrimitives:topology + indexCount:config.nindices + indexType:MTLIndexTypeUInt32 + indexBuffer:buffer + indexBufferOffset:off]; + g_perfmon.Put(GSPerfMon::DrawCalls, 1); + } +} + +// tbh I'm not a fan of the current debug groups +// not much useful information and makes things harder to find +// good to turn on if you're debugging tc stuff though +#ifndef MTL_ENABLE_DEBUG + #define MTL_ENABLE_DEBUG 0 +#endif + +void GSDeviceMTL::PushDebugGroup(const char* fmt, ...) +{ +#if MTL_ENABLE_DEBUG + va_list va; + va_start(va, fmt); + MRCOwned nsfmt = MRCTransfer([[NSString alloc] initWithUTF8String:fmt]); + m_debug_entries.emplace_back(DebugEntry::Push, MRCTransfer([[NSString alloc] initWithFormat:nsfmt arguments:va])); + va_end(va); +#endif +} + +void GSDeviceMTL::PopDebugGroup() +{ +#if MTL_ENABLE_DEBUG + m_debug_entries.emplace_back(DebugEntry::Pop, nullptr); +#endif +} + +void GSDeviceMTL::InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...) +{ +#if MTL_ENABLE_DEBUG + va_list va; + va_start(va, fmt); + MRCOwned nsfmt = MRCTransfer([[NSString alloc] initWithUTF8String:fmt]); + m_debug_entries.emplace_back(DebugEntry::Insert, MRCTransfer([[NSString alloc] initWithFormat:nsfmt arguments:va])); + va_end(va); +#endif +} + +void GSDeviceMTL::ProcessDebugEntry(id enc, const DebugEntry& entry) +{ + switch (entry.op) + { + case DebugEntry::Push: + [enc pushDebugGroup:entry.str]; + m_debug_group_level++; + break; + case DebugEntry::Pop: + [enc popDebugGroup]; + if (m_debug_group_level > 0) + m_debug_group_level--; + break; + case DebugEntry::Insert: + [enc insertDebugSignpost:entry.str]; + break; + } +} + +void GSDeviceMTL::FlushDebugEntries(id enc) +{ +#if MTL_ENABLE_DEBUG + if (!m_debug_entries.empty()) + { + for (const DebugEntry& entry : m_debug_entries) + { + ProcessDebugEntry(enc, entry); + } + m_debug_entries.clear(); + } +#endif +} + +void GSDeviceMTL::EndDebugGroup(id enc) +{ +#if MTL_ENABLE_DEBUG + if (!m_debug_entries.empty() && m_debug_group_level) + { + auto begin = m_debug_entries.begin(); + auto cur = begin; + auto end = m_debug_entries.end(); + while (cur != end && m_debug_group_level) + { + ProcessDebugEntry(enc, *cur); + cur++; + } + m_debug_entries.erase(begin, cur); + } +#endif +} + +static simd::float2 ToSimd(const ImVec2& vec) +{ + return simd::make_float2(vec.x, vec.y); +} + +static simd::float4 ToSimd(const ImVec4& vec) +{ + return simd::make_float4(vec.x, vec.y, vec.z, vec.w); +} + +void GSDeviceMTL::RenderImGui(ImDrawData* data) +{ + if (data->CmdListsCount == 0) + return; + simd::float4 transform; + transform.xy = 2.f / simd::make_float2(data->DisplaySize.x, -data->DisplaySize.y); + transform.zw = ToSimd(data->DisplayPos) * -transform.xy + simd::make_float2(-1, 1); + id enc = m_current_render.encoder; + [enc pushDebugGroup:@"ImGui"]; + + Map map = Allocate(m_vertex_upload_buf, data->TotalVtxCount * sizeof(ImDrawVert) + data->TotalIdxCount * sizeof(ImDrawIdx)); + size_t vtx_off = 0; + size_t idx_off = data->TotalVtxCount * sizeof(ImDrawVert); + + [enc setRenderPipelineState:m_imgui_pipeline]; + [enc setVertexBuffer:map.gpu_buffer offset:map.gpu_offset atIndex:GSMTLBufferIndexVertices]; + [enc setVertexBytes:&transform length:sizeof(transform) atIndex:GSMTLBufferIndexUniforms]; + + simd::uint4 last_scissor = simd::make_uint4(0, 0, m_display->GetWindowWidth(), m_display->GetWindowHeight()); + simd::float2 fb_size = simd::float2(last_scissor.zw); + simd::float2 clip_off = ToSimd(data->DisplayPos); // (0,0) unless using multi-viewports + simd::float2 clip_scale = ToSimd(data->FramebufferScale); // (1,1) unless using retina display which are often (2,2) + ImTextureID last_tex = nullptr; + bool last_tex_a8 = false; + + for (int i = 0; i < data->CmdListsCount; i++) + { + const ImDrawList* cmd_list = data->CmdLists[i]; + size_t vtx_size = cmd_list->VtxBuffer.Size * sizeof(ImDrawVert); + size_t idx_size = cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx); + memcpy(static_cast(map.cpu_buffer) + vtx_off, cmd_list->VtxBuffer.Data, vtx_size); + memcpy(static_cast(map.cpu_buffer) + idx_off, cmd_list->IdxBuffer.Data, idx_size); + + for (const ImDrawCmd& cmd : cmd_list->CmdBuffer) + { + if (cmd.UserCallback) + [NSException raise:@"Unimplemented" format:@"UserCallback not implemented"]; + + simd::float4 clip_rect = (ToSimd(cmd.ClipRect) - clip_off.xyxy) * clip_scale.xyxy; + simd::float2 clip_min = clip_rect.xy; + simd::float2 clip_max = clip_rect.zw; + clip_min = simd::max(clip_min, simd::float2(0)); + clip_max = simd::min(clip_max, fb_size); + if (simd::any(clip_min >= clip_max)) + continue; + simd::uint4 scissor = simd::make_uint4(simd::uint2(clip_min), simd::uint2(clip_max - clip_min)); + ImTextureID tex = cmd.GetTexID(); + if (simd::any(scissor != last_scissor)) + { + last_scissor = scissor; + [enc setScissorRect:(MTLScissorRect){ .x = scissor.x, .y = scissor.y, .width = scissor.z, .height = scissor.w }]; + } + if (tex != last_tex) + { + last_tex = tex; + [enc setFragmentTexture:(__bridge id)tex atIndex:0]; + if (!m_dev.features.texture_swizzle) + { + bool a8 = [(__bridge id)tex pixelFormat] == MTLPixelFormatA8Unorm; + if (last_tex_a8 != a8) + { + [enc setRenderPipelineState:a8 ? m_imgui_pipeline_a8 : m_imgui_pipeline]; + last_tex_a8 = a8; + } + } + } + + [enc setVertexBufferOffset:map.gpu_offset + vtx_off + cmd.VtxOffset * sizeof(ImDrawVert) atIndex:0]; + [enc drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:cmd.ElemCount + indexType:sizeof(ImDrawIdx) == 2 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 + indexBuffer:map.gpu_buffer + indexBufferOffset:map.gpu_offset + idx_off + cmd.IdxOffset * sizeof(ImDrawIdx)]; + } + + vtx_off += vtx_size; + idx_off += idx_size; + } + + [enc popDebugGroup]; +} + +#endif // __APPLE__ diff --git a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h new file mode 100644 index 0000000000..c0f8c1740a --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.h @@ -0,0 +1,66 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#ifndef __OBJC__ + #error "This header is for use with Objective-C++ only. +#endif + +#ifdef __APPLE__ + +#include "PCSX2Base.h" +#include "common/MRCHelpers.h" +#include + +struct GSMTLDevice +{ + enum class MetalVersion : u8 + { + Metal20, ///< Metal 2.0 (macOS 10.13, iOS 11) + Metal21, ///< Metal 2.1 (macOS 10.14, iOS 12) + Metal22, ///< Metal 2.2 (macOS 10.15, iOS 13) + Metal23, ///< Metal 2.3 (macOS 11, iOS 14) + }; + + struct Features + { + bool unified_memory; + bool texture_swizzle; + bool framebuffer_fetch; + bool primid; + bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD + MetalVersion shader_version; + int max_texsize; + }; + + MRCOwned> dev; + MRCOwned> shaders; + Features features; + + GSMTLDevice() = default; + explicit GSMTLDevice(MRCOwned> dev); + + bool IsOk() const { return dev && shaders; } + void Reset() + { + dev = nullptr; + shaders = nullptr; + } +}; + +const char* to_string(GSMTLDevice::MetalVersion ver); + +#endif // __APPLE__ diff --git a/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm new file mode 100644 index 0000000000..9a8eafef7c --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSMTLDeviceInfo.mm @@ -0,0 +1,214 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2022 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "GSMTLDeviceInfo.h" +#include "GS/GS.h" +#include "common/Console.h" + +#ifdef __APPLE__ + +static id loadMainLibrary(id dev, NSString* name) +{ + NSString* path = [[NSBundle mainBundle] pathForResource:name ofType:@"metallib"]; + return path ? [dev newLibraryWithFile:path error:nullptr] : nullptr; +} + +static MRCOwned> loadMainLibrary(id dev) +{ + if (@available(macOS 11.0, iOS 14.0, *)) + if (id lib = loadMainLibrary(dev, @"Metal23")) + return MRCTransfer(lib); + if (@available(macOS 10.15, iOS 13.0, *)) + if (id lib = loadMainLibrary(dev, @"Metal22")) + return MRCTransfer(lib); + if (@available(macOS 10.14, iOS 12.0, *)) + if (id lib = loadMainLibrary(dev, @"Metal21")) + return MRCTransfer(lib); + return MRCTransfer([dev newDefaultLibrary]); +} + +static GSMTLDevice::MetalVersion detectLibraryVersion(id lib) +{ + // These functions are defined in tfx.metal to indicate the metal version used to make the metallib + if (MRCTransfer([lib newFunctionWithName:@"metal_version_23"])) + return GSMTLDevice::MetalVersion::Metal23; + if (MRCTransfer([lib newFunctionWithName:@"metal_version_22"])) + return GSMTLDevice::MetalVersion::Metal22; + if (MRCTransfer([lib newFunctionWithName:@"metal_version_21"])) + return GSMTLDevice::MetalVersion::Metal21; + return GSMTLDevice::MetalVersion::Metal20; +} + +static bool detectPrimIDSupport(id dev, id lib) +{ + // Nvidia Metal driver is missing primid support, yay + MRCOwned desc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [desc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])]; + [desc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"primid_test"])]; + [[desc colorAttachments][0] setPixelFormat:MTLPixelFormatR8Uint]; + NSError* err; + [[dev newRenderPipelineStateWithDescriptor:desc error:&err] release]; + return !err; +} + +namespace +{ + enum class DetectionResult + { + HaswellOrNotIntel, ///< Everything works fine + Broadwell, ///< PrimID broken + Skylake, ///< PrimID broken, FBFetch supported + }; +} + +static DetectionResult detectIntelGPU(id dev, id lib) +{ + // Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch! + // Annoyingly, the Haswell compiler successfully makes a pipeline but actually miscompiles it and doesn't insert any fbfetch instructions + // The Broadwell compiler inserts the Skylake fbfetch instruction, but Broadwell doesn't support that. It seems to make the shader not do anything + // So we actually have to test the thing + // In addition, Broadwell+ has broken primid so we need to disable that. + // Conveniently we can use the same test to detect both (except on macOS < 11. All Broadwell machines support 11, so the answer to that is "upgrade") + // See https://github.com/tellowkrinkle/MetalBugReproduction/releases/tag/BrokenPrimID for details + + // AMD compiler crashes and gets retried 3 times over multiple seconds trying to compile the pipeline + // We know this is only a possibility on Intel anyways + if (![[dev name] containsString:@"Intel"]) + return DetectionResult::HaswellOrNotIntel; + auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [pdesc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])]; + [pdesc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"fbfetch_test"])]; + [[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm]; + auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]); + if (!pipe) + return DetectionResult::HaswellOrNotIntel; + auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]); + auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm width:1 height:1 mipmapped:false]; + [tdesc setUsage:MTLTextureUsageRenderTarget]; + auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]); + auto q = MRCTransfer([dev newCommandQueue]); + u32 px = 0x11223344; + memcpy([buf contents], &px, 4); + id cmdbuf = [q commandBuffer]; + id upload = [cmdbuf blitCommandEncoder]; + [upload copyFromBuffer:buf sourceOffset:0 sourceBytesPerRow:4 sourceBytesPerImage:4 sourceSize:MTLSizeMake(1, 1, 1) toTexture:tex destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; + [upload endEncoding]; + auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); + [[rpdesc colorAttachments][0] setTexture:tex]; + [[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad]; + [[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore]; + id renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc]; + [renc setRenderPipelineState:pipe]; + [renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [renc endEncoding]; + id download = [cmdbuf blitCommandEncoder]; + [download copyFromTexture:tex sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(1, 1, 1) toBuffer:buf destinationOffset:0 destinationBytesPerRow:4 destinationBytesPerImage:4]; + [download endEncoding]; + [cmdbuf commit]; + [cmdbuf waitUntilCompleted]; + u32 outpx; + memcpy(&outpx, [buf contents], 4); + // Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing + if (outpx == 0x22446688) + return DetectionResult::Skylake; + else if (outpx == 0x11223344) + return DetectionResult::Broadwell; + else + return DetectionResult::HaswellOrNotIntel; +} + +GSMTLDevice::GSMTLDevice(MRCOwned> dev) +{ + if (!dev) + return; + shaders = loadMainLibrary(dev); + + memset(&features, 0, sizeof(features)); + + if (char* env = getenv("MTL_UNIFIED_MEMORY")) + features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; + else if (@available(macOS 10.15, iOS 13.0, *)) + features.unified_memory = [dev hasUnifiedMemory]; + else + features.unified_memory = false; + + if (@available(macOS 10.15, iOS 13.0, *)) + if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1]) + features.texture_swizzle = true; + + if (@available(macOS 11.0, iOS 13.0, *)) + if ([dev supportsFamily:MTLGPUFamilyApple1]) + features.framebuffer_fetch = true; + + features.shader_version = detectLibraryVersion(shaders); + if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23) + { + Console.Warning("Metal: GPU supports framebuffer fetch but shader lib does not! Get an updated shader lib for better performance!"); + features.framebuffer_fetch = false; + } + + features.primid = features.shader_version >= MetalVersion::Metal22; + if (features.primid && !detectPrimIDSupport(dev, shaders)) + features.primid = false; + + if (!features.framebuffer_fetch && features.shader_version >= MetalVersion::Metal23) + { + switch (detectIntelGPU(dev, shaders)) + { + case DetectionResult::HaswellOrNotIntel: + break; + case DetectionResult::Broadwell: + features.primid = false; // Broken + break; + case DetectionResult::Skylake: + features.primid = false; // Broken + features.framebuffer_fetch = true; + break; + } + } + + if (features.framebuffer_fetch && GSConfig.DisableFramebufferFetch) + { + Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance."); + features.framebuffer_fetch = false; + } + + if (char* env = getenv("MTL_SLOW_COLOR_COMPRESSION")) + features.slow_color_compression = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; + else + features.slow_color_compression = [[dev name] containsString:@"AMD"]; + + features.max_texsize = 8192; + if ([dev supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) + features.max_texsize = 16384; + if (@available(macOS 10.15, iOS 13.0, *)) + if ([dev supportsFamily:MTLGPUFamilyApple3]) + features.max_texsize = 16384; + + this->dev = std::move(dev); +} + +const char* to_string(GSMTLDevice::MetalVersion ver) +{ + switch (ver) + { + case GSMTLDevice::MetalVersion::Metal20: return "Metal 2.0"; + case GSMTLDevice::MetalVersion::Metal21: return "Metal 2.1"; + case GSMTLDevice::MetalVersion::Metal22: return "Metal 2.2"; + case GSMTLDevice::MetalVersion::Metal23: return "Metal 2.3"; + } +} + +#endif // __APPLE__ diff --git a/pcsx2/GS/Renderers/Metal/GSMTLShaderCommon.h b/pcsx2/GS/Renderers/Metal/GSMTLShaderCommon.h new file mode 100644 index 0000000000..96877b70f5 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSMTLShaderCommon.h @@ -0,0 +1,60 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once +#include +#include "GSMTLSharedHeader.h" + +using namespace metal; + +constant uchar2 SCALING_FACTOR [[function_constant(GSMTLConstantIndex_SCALING_FACTOR)]]; + +struct ConvertShaderData +{ + float4 p [[position]]; + float2 t; +}; + +struct ConvertPSRes +{ + texture2d texture [[texture(GSMTLTextureIndexNonHW)]]; + sampler s [[sampler(0)]]; + float4 sample(float2 coord) + { + return texture.sample(s, coord); + } +}; + +struct ConvertPSDepthRes +{ + depth2d texture [[texture(GSMTLTextureIndexNonHW)]]; + sampler s [[sampler(0)]]; + float sample(float2 coord) + { + return texture.sample(s, coord); + } +}; + +static inline float4 convert_depth32_rgba8(float value) +{ + uint val = uint(value * 0x1p32); + return float4(as_type(val)); +} + +static inline float4 convert_depth16_rgba8(float value) +{ + uint val = uint(value * 0x1p32); + return float4(uint4(val << 3, val >> 2, val >> 7, val >> 8) & uint4(0xf8, 0xf8, 0xf8, 0x80)); +} diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h new file mode 100644 index 0000000000..4f8718ab7a --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -0,0 +1,151 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once +#include + +enum GSMTLBufferIndices +{ + GSMTLBufferIndexVertices, + GSMTLBufferIndexUniforms, + GSMTLBufferIndexHWVertices, + GSMTLBufferIndexHWUniforms, +}; + +enum GSMTLTextureIndex +{ + GSMTLTextureIndexNonHW, + GSMTLTextureIndexTex, + GSMTLTextureIndexPalette, + GSMTLTextureIndexRenderTarget, + GSMTLTextureIndexPrimIDs, +}; + +struct GSMTLConvertPSUniform +{ + int emoda; + int emodc; +}; + +struct GSMTLInterlacePSUniform +{ + vector_float2 ZrH; + float hH; +}; + +struct GSMTLMainVSUniform +{ + vector_float2 vertex_scale; + vector_float2 vertex_offset; + vector_float2 texture_scale; + vector_float2 texture_offset; + vector_float2 point_size; + uint max_depth; +}; + +struct GSMTLMainPSUniform +{ + union + { + vector_float4 fog_color_aref; + vector_float3 fog_color; + struct + { + float pad0[3]; + float aref; + }; + }; + vector_float4 wh; ///< xy => PS2, zw => actual (upscaled) + vector_float2 ta; + float max_depth; + float alpha_fix; + vector_uint4 uv_msk_fix; + vector_uint4 fbmask; + + vector_float4 half_texel; + vector_float4 uv_min_max; + struct + { + unsigned int blue_mask; + unsigned int blue_shift; + unsigned int green_mask; + unsigned int green_shift; + } channel_shuffle; + vector_float2 tc_offset; + vector_float2 st_scale; + matrix_float4x4 dither_matrix; +}; + +enum GSMTLAttributes +{ + GSMTLAttributeIndexST, + GSMTLAttributeIndexC, + GSMTLAttributeIndexQ, + GSMTLAttributeIndexXY, + GSMTLAttributeIndexZ, + GSMTLAttributeIndexUV, + GSMTLAttributeIndexF, +}; + +enum GSMTLFnConstants +{ + GSMTLConstantIndex_SCALING_FACTOR, + GSMTLConstantIndex_FRAMEBUFFER_FETCH, + GSMTLConstantIndex_FST, + GSMTLConstantIndex_IIP, + GSMTLConstantIndex_VS_POINT_SIZE, + GSMTLConstantIndex_PS_AEM_FMT, + GSMTLConstantIndex_PS_PAL_FMT, + GSMTLConstantIndex_PS_DFMT, + GSMTLConstantIndex_PS_DEPTH_FMT, + GSMTLConstantIndex_PS_AEM, + GSMTLConstantIndex_PS_FBA, + GSMTLConstantIndex_PS_FOG, + GSMTLConstantIndex_PS_DATE, + GSMTLConstantIndex_PS_ATST, + GSMTLConstantIndex_PS_TFX, + GSMTLConstantIndex_PS_TCC, + GSMTLConstantIndex_PS_WMS, + GSMTLConstantIndex_PS_WMT, + GSMTLConstantIndex_PS_LTF, + GSMTLConstantIndex_PS_SHUFFLE, + GSMTLConstantIndex_PS_READ_BA, + GSMTLConstantIndex_PS_WRITE_RG, + GSMTLConstantIndex_PS_FBMASK, + GSMTLConstantIndex_PS_BLEND_A, + GSMTLConstantIndex_PS_BLEND_B, + GSMTLConstantIndex_PS_BLEND_C, + GSMTLConstantIndex_PS_BLEND_D, + GSMTLConstantIndex_PS_CLR_HW, + GSMTLConstantIndex_PS_HDR, + GSMTLConstantIndex_PS_COLCLIP, + GSMTLConstantIndex_PS_BLEND_MIX, + GSMTLConstantIndex_PS_PABE, + GSMTLConstantIndex_PS_NO_COLOR, + GSMTLConstantIndex_PS_NO_COLOR1, + GSMTLConstantIndex_PS_ONLY_ALPHA, + GSMTLConstantIndex_PS_CHANNEL, + GSMTLConstantIndex_PS_DITHER, + GSMTLConstantIndex_PS_ZCLAMP, + GSMTLConstantIndex_PS_TCOFFSETHACK, + GSMTLConstantIndex_PS_URBAN_CHAOS_HLE, + GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE, + GSMTLConstantIndex_PS_TEX_IS_FB, + GSMTLConstantIndex_PS_AUTOMATIC_LOD, + GSMTLConstantIndex_PS_MANUAL_LOD, + GSMTLConstantIndex_PS_POINT_SAMPLER, + GSMTLConstantIndex_PS_INVALID_TEX0, + GSMTLConstantIndex_PS_SCANMSK, +}; diff --git a/pcsx2/GS/Renderers/Metal/GSMetalCPPAccessible.h b/pcsx2/GS/Renderers/Metal/GSMetalCPPAccessible.h new file mode 100644 index 0000000000..cd4fae8120 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSMetalCPPAccessible.h @@ -0,0 +1,28 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once +// Header with all metal stuff available for use with C++ (rather than Objective-C++) + +#ifdef __APPLE__ + +#include "HostDisplay.h" + +class GSDevice; +GSDevice* MakeGSDeviceMTL(); +HostDisplay* MakeMetalHostDisplay(); +HostDisplay::AdapterAndModeList GetMetalAdapterAndModeList(); + +#endif diff --git a/pcsx2/GS/Renderers/Metal/GSTextureMTL.h b/pcsx2/GS/Renderers/Metal/GSTextureMTL.h new file mode 100644 index 0000000000..11007f99eb --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSTextureMTL.h @@ -0,0 +1,82 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "GS/Renderers/Common/GSTexture.h" + +#ifndef __OBJC__ + #error "This header is for use with Objective-C++ only. +#endif + +#ifdef __APPLE__ +#include "common/MRCHelpers.h" +#include + +class GSDeviceMTL; + +class GSTextureMTL : public GSTexture +{ + GSDeviceMTL* m_dev; + MRCOwned> m_texture; + bool m_has_mipmaps = false; + + // In Metal clears happen as a part of render passes instead of as separate steps, but the GSDevice API has it as a separate step + // To deal with that, store the fact that a clear was requested here and it'll be applied on the next render pass + bool m_needs_color_clear = false; + bool m_needs_depth_clear = false; + bool m_needs_stencil_clear = false; + GSVector4 m_clear_color; + float m_clear_depth; + int m_clear_stencil; + +public: + u64 m_last_read = 0; ///< Last time this texture was read by a draw + u64 m_last_write = 0; ///< Last time this texture was written by a draw + GSTextureMTL(GSDeviceMTL* dev, MRCOwned> texture, Type type, Format format); + ~GSTextureMTL(); + + /// For making fake backbuffers + void SetSize(GSVector2i size) { m_size = size; } + + /// Requests the texture be cleared the next time a color render is done + void RequestColorClear(GSVector4 color); + /// Requests the texture be cleared the next time a depth render is done + void RequestDepthClear(float depth); + /// Requests the texture be cleared the next time a stencil render is done + void RequestStencilClear(int stencil); + /// Reads whether a color clear was requested, then clears the request + bool GetResetNeedsColorClear(GSVector4& colorOut); + /// Reads whether a depth clear was requested, then clears the request + bool GetResetNeedsDepthClear(float& depthOut); + /// Reads whether a stencil clear was requested, then clears the request + bool GetResetNeedsStencilClear(int& stencilOut); + /// Flushes requested clears to the texture + void FlushClears(); + /// Marks pending clears as done (e.g. if the whole texture is about to be overwritten) + void InvalidateClears(); + + void* GetNativeHandle() const override; + bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) override; + bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) override; + void* MapWithPitch(const GSVector4i& r, int pitch, int layer); + void Unmap() override; + void GenerateMipmap() override; + bool Save(const std::string& fn) override; + void Swap(GSTexture* tex) override; + id GetTexture() { return m_texture; } +}; + +#endif diff --git a/pcsx2/GS/Renderers/Metal/GSTextureMTL.mm b/pcsx2/GS/Renderers/Metal/GSTextureMTL.mm new file mode 100644 index 0000000000..a016b6da98 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/GSTextureMTL.mm @@ -0,0 +1,216 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "GSTextureMTL.h" +#include "GSDeviceMTL.h" +#include "GS/GSPerfMon.h" + +#ifdef __APPLE__ + +GSTextureMTL::GSTextureMTL(GSDeviceMTL* dev, MRCOwned> texture, Type type, Format format) + : m_dev(dev) + , m_texture(std::move(texture)) +{ + m_type = type; + m_format = format; + m_size.x = [m_texture width]; + m_size.y = [m_texture height]; + m_mipmap_levels = [m_texture mipmapLevelCount]; +} +GSTextureMTL::~GSTextureMTL() +{ +} + +void GSTextureMTL::RequestColorClear(GSVector4 color) +{ + m_needs_color_clear = true; + m_clear_color = color; +} +void GSTextureMTL::RequestDepthClear(float depth) +{ + m_needs_depth_clear = true; + m_clear_depth = depth; +} +void GSTextureMTL::RequestStencilClear(int stencil) +{ + m_needs_stencil_clear = true; + m_clear_stencil = stencil; +} +bool GSTextureMTL::GetResetNeedsColorClear(GSVector4& colorOut) +{ + if (m_needs_color_clear) + { + m_needs_color_clear = false; + colorOut = m_clear_color; + return true; + } + return false; +} +bool GSTextureMTL::GetResetNeedsDepthClear(float& depthOut) +{ + if (m_needs_depth_clear) + { + m_needs_depth_clear = false; + depthOut = m_clear_depth; + return true; + } + return false; +} +bool GSTextureMTL::GetResetNeedsStencilClear(int& stencilOut) +{ + if (m_needs_stencil_clear) + { + m_needs_stencil_clear = false; + stencilOut = m_clear_stencil; + return true; + } + return false; +} + +void GSTextureMTL::FlushClears() +{ + if (!m_needs_color_clear && !m_needs_depth_clear && !m_needs_stencil_clear) + return; + + m_dev->BeginRenderPass(@"Clear", + m_needs_color_clear ? this : nullptr, MTLLoadActionLoad, + m_needs_depth_clear ? this : nullptr, MTLLoadActionLoad, + m_needs_stencil_clear ? this : nullptr, MTLLoadActionLoad); +} + +void* GSTextureMTL::GetNativeHandle() const +{ + return (__bridge void*)m_texture; +} + +void GSTextureMTL::InvalidateClears() +{ + m_needs_color_clear = false; + m_needs_depth_clear = false; + m_needs_stencil_clear = false; +} + +bool GSTextureMTL::Update(const GSVector4i& r, const void* data, int pitch, int layer) +{ + if (void* buffer = MapWithPitch(r, pitch, layer)) + { + memcpy(buffer, data, CalcUploadSize(r.height(), pitch)); + return true; + } + return false; +} + +bool GSTextureMTL::Map(GSMap& m, const GSVector4i* _r, int layer) +{ + GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y); + u32 block_size = GetCompressedBlockSize(); + u32 blocks_wide = (r.width() + block_size - 1) / block_size; + m.pitch = blocks_wide * GetCompressedBytesPerBlock(); + if (void* buffer = MapWithPitch(r, m.pitch, layer)) + { + m.bits = static_cast(buffer); + return true; + } + return false; +} + +void* GSTextureMTL::MapWithPitch(const GSVector4i& r, int pitch, int layer) +{ @autoreleasepool { + if (layer >= m_mipmap_levels) + return nullptr; + m_has_mipmaps = false; + + size_t size = CalcUploadSize(r.height(), pitch); + GSDeviceMTL::Map map; + + bool needs_clear = false; + if (m_needs_color_clear) + { + m_needs_color_clear = false; + // Not uploading to full texture + needs_clear = r.left > 0 || r.top > 0 || r.right < m_size.x || r.bottom < m_size.y; + } + + id enc; + if (m_last_read == m_dev->m_current_draw || needs_clear) + { + if (needs_clear) + { + m_needs_color_clear = true; + m_dev->BeginRenderPass(@"Pre-Upload Clear", this, MTLLoadActionLoad, nullptr, MTLLoadActionDontCare); + } + enc = m_dev->GetLateTextureUploadEncoder(); + map = m_dev->Allocate(m_dev->m_vertex_upload_buf, size); + } + else + { + enc = m_dev->GetTextureUploadEncoder(); + map = m_dev->Allocate(m_dev->m_texture_upload_buf, size); + } + // Copy is scheduled now, won't happen until the encoder is committed so no problems with ordering + [enc copyFromBuffer:map.gpu_buffer + sourceOffset:map.gpu_offset + sourceBytesPerRow:pitch + sourceBytesPerImage:size + sourceSize:MTLSizeMake(r.width(), r.height(), 1) + toTexture:m_texture + destinationSlice:0 + destinationLevel:layer + destinationOrigin:MTLOriginMake(r.x, r.y, 0)]; + + g_perfmon.Put(GSPerfMon::TextureUploads, 1); + return map.cpu_buffer; +}} + +void GSTextureMTL::Unmap() +{ + // Nothing to do here, upload is already scheduled +} + +void GSTextureMTL::GenerateMipmap() +{ @autoreleasepool { + if (m_mipmap_levels > 1 && !m_has_mipmaps) + { + id enc = m_dev->GetTextureUploadEncoder(); + [enc generateMipmapsForTexture:m_texture]; + } +}} + +bool GSTextureMTL::Save(const std::string& fn) +{ + // TODO: Implement + return false; +} + +void GSTextureMTL::Swap(GSTexture* other) +{ + GSTexture::Swap(other); + + GSTextureMTL* mtex = static_cast(other); + pxAssert(m_dev == mtex->m_dev); +#define SWAP(x) std::swap(x, mtex->x) + SWAP(m_texture); + SWAP(m_has_mipmaps); + SWAP(m_needs_color_clear); + SWAP(m_needs_depth_clear); + SWAP(m_needs_stencil_clear); + SWAP(m_clear_color); + SWAP(m_clear_depth); + SWAP(m_clear_stencil); +#undef SWAP +} + +#endif diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal new file mode 100644 index 0000000000..f704f60ec2 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -0,0 +1,378 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "GSMTLShaderCommon.h" + +using namespace metal; + +struct ConvertVSIn +{ + vector_float2 position [[attribute(0)]]; + vector_float2 texcoord0 [[attribute(1)]]; +}; + +struct ImGuiVSIn +{ + vector_float2 position [[attribute(0)]]; + vector_float2 texcoord0 [[attribute(1)]]; + vector_half4 color [[attribute(2)]]; +}; + +struct ImGuiShaderData +{ + float4 p [[position]]; + float2 t; + half4 c; +}; + +template +struct DirectReadTextureIn +{ + texture2d tex [[texture(GSMTLTextureIndexNonHW)]]; + vec read(float4 pos) + { + return tex.read(uint2(pos.xy)); + } +}; + +vertex ConvertShaderData fs_triangle(uint vid [[vertex_id]]) +{ + ConvertShaderData out; + out.p = float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1); + out.t = float2(vid & 1 ? 2 : 0, vid & 2 ? -1 : 1); + return out; +} + +vertex ConvertShaderData vs_convert(ConvertVSIn in [[stage_in]]) +{ + ConvertShaderData out; + out.p = float4(in.position, 0, 1); + out.t = in.texcoord0; + return out; +} + +vertex ImGuiShaderData vs_imgui(ImGuiVSIn in [[stage_in]], constant float4& cb [[buffer(GSMTLBufferIndexUniforms)]]) +{ + ImGuiShaderData out; + out.p = float4(in.position * cb.xy + cb.zw, 0, 1); + out.t = in.texcoord0; + out.c = in.color; + return out; +} + +float4 ps_crt(float4 color, int i) +{ + constexpr float4 mask[4] = + { + float4(1, 0, 0, 0), + float4(0, 1, 0, 0), + float4(0, 0, 1, 0), + float4(1, 1, 1, 0), + }; + + return color * saturate(mask[i] + 0.5f); +} + +float4 ps_scanlines(float4 color, int i) +{ + constexpr float4 mask[2] = + { + float4(1, 1, 1, 0), + float4(0, 0, 0, 0) + }; + + return color * saturate(mask[i] + 0.5f); +} + +fragment float4 ps_copy(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + return res.sample(data.t); +} + +fragment ushort ps_convert_rgba8_16bits(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + float4 c = res.sample(data.t); + uint4 cu = uint4(c * 255.f + 0.5f); + return (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000); +} + +fragment float4 ps_copy_fs(float4 p [[position]], DirectReadTextureIn tex) +{ + return tex.read(p); +} + +fragment void ps_datm1(float4 p [[position]], DirectReadTextureIn tex) +{ + if (tex.read(p).a < (127.5f / 255.f)) + discard_fragment(); +} + +fragment void ps_datm0(float4 p [[position]], DirectReadTextureIn tex) +{ + if (tex.read(p).a > (127.5f / 255.f)) + discard_fragment(); +} + +fragment float4 ps_primid_init_datm0(float4 p [[position]], DirectReadTextureIn tex) +{ + return tex.read(p).a > (127.5f / 255.f) ? -1 : FLT_MAX; +} + +fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn tex) +{ + return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX; +} + +fragment float4 ps_mod256(float4 p [[position]], DirectReadTextureIn tex) +{ + float4 c = round(tex.read(p) * 255.f); + return (c - 256.f * floor(c / 256.f)) / 255.f; +} + +fragment float4 ps_filter_scanlines(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + return ps_scanlines(res.sample(data.t), uint(data.p.y) % 2); +} + +fragment float4 ps_filter_diagonal(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + uint4 p = uint4(data.p); + return ps_crt(res.sample(data.t), (p.x + (p.y % 3)) % 3); +} + +fragment float4 ps_filter_transparency(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + float4 c = res.sample(data.t); + c.a = dot(c.rgb, float3(0.299f, 0.587f, 0.114f)); + return c; +} + +fragment float4 ps_filter_triangular(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + uint4 p = uint4(data.p); + uint val = ((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3; + return ps_crt(res.sample(data.t), val); +} + +fragment float4 ps_filter_complex(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + float2 texdim = float2(res.texture.get_width(), res.texture.get_height()); + + if (dfdy(data.t.y) * texdim.y > 0.5) + { + return res.sample(data.t); + } + else + { + float factor = (0.9f - 0.4f * cos(2.f * M_PI_F * data.t.y * texdim.y)); + float ycoord = (floor(data.t.y * texdim.y) + 0.5f) / texdim.y; + return factor * res.sample(float2(data.t.x, ycoord)); + } +} + +fragment uint ps_convert_float32_32bits(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res) +{ + return uint(0x1p32 * res.sample(data.t)); +} + +fragment float4 ps_convert_float32_rgba8(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res) +{ + return convert_depth32_rgba8(res.sample(data.t)) / 255.f; +} + +fragment float4 ps_convert_float16_rgb5a1(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res) +{ + return convert_depth16_rgba8(res.sample(data.t)) / 255.f; +} + +struct DepthOut +{ + float depth [[depth(any)]]; + DepthOut(float depth): depth(depth) {} +}; + +fragment DepthOut ps_depth_copy(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res) +{ + return res.sample(data.t); +} + +static float pack_rgba8_depth(float4 unorm) +{ + return float(as_type(uchar4(unorm * 255.f + 0.5f))) * 0x1p-32f; +} + +fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + return pack_rgba8_depth(res.sample(data.t)); +} + +fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + // Same as above but without the alpha channel (24 bits Z) + return pack_rgba8_depth(float4(res.sample(data.t).rgb, 0)); +} + +fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + return float(as_type(uchar2(res.sample(data.t).rg * 255.f + 0.5f))) * 0x1p-32; +} + +fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + uint4 cu = uint4(res.sample(data.t) * 255.f + 0.5f); + uint out = (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000); + return float(out) * 0x1p-32; +} + +fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + // Convert a RGBA texture into a 8 bits packed texture + // Input column: 8x2 RGBA pixels + // 0: 8 RGBA + // 1: 8 RGBA + // Output column: 16x4 Index pixels + // 0: 8 R | 8 B + // 1: 8 R | 8 B + // 2: 8 G | 8 A + // 3: 8 G | 8 A + float c; + + uint2 sel = uint2(data.p.xy) % uint2(16, 16); + uint2 tb = (uint2(data.p.xy) & ~uint2(15, 3)) >> 1; + + uint ty = tb.y | (uint(data.p.y) & 1); + uint txN = tb.x | (uint(data.p.x) & 7); + uint txH = tb.x | ((uint(data.p.x) + 4) & 7); + + txN *= SCALING_FACTOR.x; + txH *= SCALING_FACTOR.x; + ty *= SCALING_FACTOR.y; + + // TODO investigate texture gather + float4 cN = res.texture.read(uint2(txN, ty)); + float4 cH = res.texture.read(uint2(txH, ty)); + + if ((sel.y & 4) == 0) + { + // Column 0 and 2 + if ((sel.y & 2) == 0) + { + if ((sel.x & 8) == 0) + c = cN.r; + else + c = cN.b; + } + else + { + if ((sel.x & 8) == 0) + c = cH.g; + else + c = cH.a; + } + } + else + { + // Column 1 and 3 + if ((sel.y & 2) == 0) + { + if ((sel.x & 8) == 0) + c = cH.r; + else + c = cH.b; + } + else + { + if ((sel.x & 8) == 0) + c = cN.g; + else + c = cN.a; + } + } + return float4(c); +} + +fragment float4 ps_yuv(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + float4 i = res.sample(data.t); + float4 o; + + // Value from GS manual + const float3x3 rgb2yuv = + { + {0.587, -0.311, -0.419}, + {0.114, 0.500, -0.081}, + {0.299, -0.169, 0.500} + }; + + float3 yuv = rgb2yuv * i.gbr; + + float Y = 0xDB / 255.f * yuv.x + 0x10 / 255.f; + float Cr = 0xE0 / 255.f * yuv.y + 0x80 / 255.f; + float Cb = 0xE0 / 255.f * yuv.z + 0x80 / 255.f; + + switch (uniform.emoda) + { + case 0: o.a = i.a; break; + case 1: o.a = Y; break; + case 2: o.a = Y/2; break; + case 3: o.a = 0; break; + } + + switch (uniform.emodc) + { + case 0: o.rgb = i.rgb; break; + case 1: o.rgb = float3(Y); break; + case 2: o.rgb = float3(Y, Cb, Cr); break; + case 3: o.rgb = float3(i.a); break; + } + + return o; +} + +fragment half4 ps_imgui(ImGuiShaderData data [[stage_in]], texture2d texture [[texture(GSMTLTextureIndexNonHW)]]) +{ + constexpr sampler s(coord::normalized, filter::linear, address::clamp_to_edge); + return data.c * texture.sample(s, data.t); +} + +fragment half4 ps_imgui_a8(ImGuiShaderData data [[stage_in]], texture2d texture [[texture(GSMTLTextureIndexNonHW)]]) +{ + constexpr sampler s(coord::normalized, filter::linear, address::clamp_to_edge); + return data.c * half4(1, 1, 1, texture.sample(s, data.t).a); +} + +fragment float4 ps_shadeboost(float4 p [[position]], DirectReadTextureIn tex, constant float3& cb [[buffer(GSMTLBufferIndexUniforms)]]) +{ + const float brt = cb.x; + const float con = cb.y; + const float sat = cb.z; + // Increase or decrease these values to adjust r, g and b color channels separately + const float AvgLumR = 0.5; + const float AvgLumG = 0.5; + const float AvgLumB = 0.5; + + const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721); + + float3 AvgLumin = float3(AvgLumR, AvgLumG, AvgLumB); + float3 brtColor = tex.read(p).rgb * brt; + float dot_intensity = dot(brtColor, LumCoeff); + float3 intensity = float3(dot_intensity, dot_intensity, dot_intensity); + float3 satColor = mix(intensity, brtColor, sat); + float3 conColor = mix(AvgLumin, satColor, con); + + return float4(conColor, 1); +} diff --git a/pcsx2/GS/Renderers/Metal/fxaa.metal b/pcsx2/GS/Renderers/Metal/fxaa.metal new file mode 100644 index 0000000000..7f73663d33 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/fxaa.metal @@ -0,0 +1,10 @@ +#include "GSMTLShaderCommon.h" +#include "../../../../bin/resources/shaders/common/fxaa.fx" + +fragment float4 ps_fxaa(ConvertShaderData data [[stage_in]], texture2d tex [[texture(GSMTLTextureIndexNonHW)]]) +{ + float4 color = tex.sample(MAIN_SAMPLER, data.t); + color = PreGammaPass(color); + color = FxaaPass(color, data.t, tex); + return color; +} diff --git a/pcsx2/GS/Renderers/Metal/interlace.metal b/pcsx2/GS/Renderers/Metal/interlace.metal new file mode 100644 index 0000000000..061e5e5d22 --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/interlace.metal @@ -0,0 +1,49 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "GSMTLShaderCommon.h" + +using namespace metal; + +fragment float4 ps_interlace0(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + if (fract(data.t.y * uniform.hH) - 0.5f < 0.f) + discard_fragment(); + return res.sample(data.t); +} + +fragment float4 ps_interlace1(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + if (0.5f - fract(data.t.y * uniform.hH) < 0.f) + discard_fragment(); + return res.sample(data.t); +} + +fragment float4 ps_interlace2(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]]) +{ + float4 c0 = res.sample(data.t - uniform.ZrH); + float4 c1 = res.sample(data.t); + float4 c2 = res.sample(data.t + uniform.ZrH); + return (c0 + c1 * 2.f + c2) / 4.f; +} + +fragment float4 ps_interlace3(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + return res.sample(data.t); +} + diff --git a/pcsx2/GS/Renderers/Metal/merge.metal b/pcsx2/GS/Renderers/Metal/merge.metal new file mode 100644 index 0000000000..f8af9651bc --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/merge.metal @@ -0,0 +1,34 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "GSMTLShaderCommon.h" + +using namespace metal; + +fragment float4 ps_merge0(ConvertShaderData data [[stage_in]], ConvertPSRes res) +{ + float4 c = res.sample(data.t); + c.a *= 2.f; + return c; +} + +fragment float4 ps_merge1(ConvertShaderData data [[stage_in]], ConvertPSRes res, + constant vector_float4& BGColor [[buffer(GSMTLBufferIndexUniforms)]]) +{ + float4 c = res.sample(data.t); + c.a = BGColor.a; + return c; +} + diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal new file mode 100644 index 0000000000..58cc187bfa --- /dev/null +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -0,0 +1,939 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2021 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "GSMTLShaderCommon.h" + +constant uint FMT_32 = 0; +constant uint FMT_24 = 1; +constant uint FMT_16 = 2; + +constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAMEBUFFER_FETCH)]]; +constant bool FST [[function_constant(GSMTLConstantIndex_FST)]]; +constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]]; +constant bool VS_POINT_SIZE [[function_constant(GSMTLConstantIndex_VS_POINT_SIZE)]]; +constant uint PS_AEM_FMT [[function_constant(GSMTLConstantIndex_PS_AEM_FMT)]]; +constant uint PS_PAL_FMT [[function_constant(GSMTLConstantIndex_PS_PAL_FMT)]]; +constant uint PS_DFMT [[function_constant(GSMTLConstantIndex_PS_DFMT)]]; +constant uint PS_DEPTH_FMT [[function_constant(GSMTLConstantIndex_PS_DEPTH_FMT)]]; +constant bool PS_AEM [[function_constant(GSMTLConstantIndex_PS_AEM)]]; +constant bool PS_FBA [[function_constant(GSMTLConstantIndex_PS_FBA)]]; +constant bool PS_FOG [[function_constant(GSMTLConstantIndex_PS_FOG)]]; +constant uint PS_DATE [[function_constant(GSMTLConstantIndex_PS_DATE)]]; +constant uint PS_ATST [[function_constant(GSMTLConstantIndex_PS_ATST)]]; +constant uint PS_TFX [[function_constant(GSMTLConstantIndex_PS_TFX)]]; +constant bool PS_TCC [[function_constant(GSMTLConstantIndex_PS_TCC)]]; +constant uint PS_WMS [[function_constant(GSMTLConstantIndex_PS_WMS)]]; +constant uint PS_WMT [[function_constant(GSMTLConstantIndex_PS_WMT)]]; +constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]]; +constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]]; +constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]]; +constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]]; +constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]]; +constant uint PS_BLEND_A [[function_constant(GSMTLConstantIndex_PS_BLEND_A)]]; +constant uint PS_BLEND_B [[function_constant(GSMTLConstantIndex_PS_BLEND_B)]]; +constant uint PS_BLEND_C [[function_constant(GSMTLConstantIndex_PS_BLEND_C)]]; +constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BLEND_D)]]; +constant uint PS_CLR_HW [[function_constant(GSMTLConstantIndex_PS_CLR_HW)]]; +constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]]; +constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]]; +constant bool PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]]; +constant bool PS_PABE [[function_constant(GSMTLConstantIndex_PS_PABE)]]; +constant bool PS_NO_COLOR [[function_constant(GSMTLConstantIndex_PS_NO_COLOR)]]; +constant bool PS_NO_COLOR1 [[function_constant(GSMTLConstantIndex_PS_NO_COLOR1)]]; +constant bool PS_ONLY_ALPHA [[function_constant(GSMTLConstantIndex_PS_ONLY_ALPHA)]]; +constant uint PS_CHANNEL [[function_constant(GSMTLConstantIndex_PS_CHANNEL)]]; +constant uint PS_DITHER [[function_constant(GSMTLConstantIndex_PS_DITHER)]]; +constant bool PS_ZCLAMP [[function_constant(GSMTLConstantIndex_PS_ZCLAMP)]]; +constant bool PS_TCOFFSETHACK [[function_constant(GSMTLConstantIndex_PS_TCOFFSETHACK)]]; +constant bool PS_URBAN_CHAOS_HLE [[function_constant(GSMTLConstantIndex_PS_URBAN_CHAOS_HLE)]]; +constant bool PS_TALES_OF_ABYSS_HLE [[function_constant(GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE)]]; +constant bool PS_TEX_IS_FB [[function_constant(GSMTLConstantIndex_PS_TEX_IS_FB)]]; +constant bool PS_AUTOMATIC_LOD [[function_constant(GSMTLConstantIndex_PS_AUTOMATIC_LOD)]]; +constant bool PS_MANUAL_LOD [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]]; +constant bool PS_POINT_SAMPLER [[function_constant(GSMTLConstantIndex_PS_POINT_SAMPLER)]]; +constant bool PS_INVALID_TEX0 [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]]; +constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]]; + +#if defined(__METAL_MACOS__) && __METAL_VERSION__ >= 220 + #define PRIMID_SUPPORT 1 +#else + #define PRIMID_SUPPORT 0 +#endif + +#if defined(__METAL_IOS__) || __METAL_VERSION__ >= 230 + #define FBFETCH_SUPPORT 1 +#else + #define FBFETCH_SUPPORT 0 +#endif + +constant bool PS_PRIM_CHECKING_INIT = PS_DATE == 1 || PS_DATE == 2; +constant bool PS_PRIM_CHECKING_READ = PS_DATE == 3; +#if PRIMID_SUPPORT +constant bool NEEDS_PRIMID = PS_PRIM_CHECKING_INIT || PS_PRIM_CHECKING_READ; +#endif +constant bool PS_TEX_IS_DEPTH = PS_URBAN_CHAOS_HLE || PS_TALES_OF_ABYSS_HLE || PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2; +constant bool PS_TEX_IS_COLOR = !PS_TEX_IS_DEPTH; +constant bool PS_HAS_PALETTE = PS_PAL_FMT != 0 || (PS_CHANNEL >= 1 && PS_CHANNEL <= 5); +constant bool NOT_IIP = !IIP; +constant bool SW_BLEND = (PS_BLEND_A != PS_BLEND_B) || PS_BLEND_D; +constant bool SW_AD_TO_HW = PS_BLEND_C == 1 && PS_CLR_HW > 3; +constant bool NEEDS_RT_FOR_BLEND = (((PS_BLEND_A != PS_BLEND_B) && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1)) || PS_BLEND_D == 1 || SW_AD_TO_HW); +constant bool NEEDS_RT_EARLY = PS_TEX_IS_FB || PS_DATE >= 5; +constant bool NEEDS_RT = NEEDS_RT_EARLY || (!PS_PRIM_CHECKING_INIT && (PS_FBMASK || NEEDS_RT_FOR_BLEND)); + +constant bool PS_COLOR0 = !PS_NO_COLOR; +constant bool PS_COLOR1 = !PS_NO_COLOR1; + +struct MainVSIn +{ + float2 st [[attribute(GSMTLAttributeIndexST)]]; + float4 c [[attribute(GSMTLAttributeIndexC)]]; + float q [[attribute(GSMTLAttributeIndexQ)]]; + uint2 p [[attribute(GSMTLAttributeIndexXY)]]; + uint z [[attribute(GSMTLAttributeIndexZ)]]; + uint2 uv [[attribute(GSMTLAttributeIndexUV)]]; + float4 f [[attribute(GSMTLAttributeIndexF)]]; +}; + +struct MainVSOut +{ + float4 p [[position]]; + float4 t; + float4 ti; + float4 c [[function_constant(IIP)]]; + float4 fc [[flat, function_constant(NOT_IIP)]]; + float point_size [[point_size, function_constant(VS_POINT_SIZE)]]; +}; + +struct MainPSIn +{ + float4 p [[position]]; + float4 t; + float4 ti; + float4 c [[function_constant(IIP)]]; + float4 fc [[flat, function_constant(NOT_IIP)]]; +}; + +struct MainPSOut +{ + float4 c0 [[color(0), index(0), function_constant(PS_COLOR0)]]; + float4 c1 [[color(0), index(1), function_constant(PS_COLOR1)]]; + float depth [[depth(less), function_constant(PS_ZCLAMP)]]; +}; + +// MARK: - Vertex functions + +static void texture_coord(thread const MainVSIn& v, thread MainVSOut& out, constant GSMTLMainVSUniform& cb) +{ + float2 uv = float2(v.uv) - cb.texture_offset; + float2 st = v.st - cb.texture_offset; + + // Float coordinate + out.t.xy = st; + out.t.w = v.q; + + // Integer coordinate => normalized + out.ti.xy = uv * cb.texture_scale; + + if (FST) + { + // Integer coordinate => integral + out.ti.zw = uv; + } + else + { + // Some games uses float coordinate for post-processing effects + out.ti.zw = st / cb.texture_scale; + } +} + +static MainVSOut vs_main_run(thread const MainVSIn& v, constant GSMTLMainVSUniform& cb) +{ + constexpr float exp_min32 = 0x1p-32; + MainVSOut out; + // Clamp to max depth, gs doesn't wrap + uint z = min(v.z, cb.max_depth); + out.p.xy = float2(v.p) - float2(0.05, 0.05); + out.p.xy = out.p.xy * float2(cb.vertex_scale.x, -cb.vertex_scale.y) - float2(cb.vertex_offset.x, -cb.vertex_offset.y); + out.p.w = 1; + out.p.z = float(z) * exp_min32; + + texture_coord(v, out, cb); + + if (IIP) + out.c = v.c; + else + out.fc = v.c; + + out.t.z = v.f.x; // pack fog with texture + + if (VS_POINT_SIZE) + out.point_size = SCALING_FACTOR.x; + + return out; +} + +vertex MainVSOut vs_main(MainVSIn v [[stage_in]], constant GSMTLMainVSUniform& cb [[buffer(GSMTLBufferIndexHWUniforms)]]) +{ + return vs_main_run(v, cb); +} + +// MARK: - Fragment functions + +constexpr sampler palette_sampler(filter::nearest, address::clamp_to_edge); + +struct PSMain +{ + texture2d tex; + depth2d tex_depth; + texture2d palette; + texture2d prim_id_tex; + sampler tex_sampler; + float4 current_color; + uint prim_id; + const thread MainPSIn& in; + constant GSMTLMainPSUniform& cb; + + PSMain(const thread MainPSIn& in, constant GSMTLMainPSUniform& cb): in(in), cb(cb) {} + + template + float4 sample_tex(Args... args) + { + if (PS_TEX_IS_DEPTH) + return float4(tex_depth.sample(args...)); + else + return tex.sample(args...); + } + + float4 sample_c(float2 uv) + { + if (PS_TEX_IS_FB) + return current_color; + + if (PS_POINT_SAMPLER) + { + // Weird issue with ATI/AMD cards, + // it looks like they add 127/128 of a texel to sampling coordinates + // occasionally causing point sampling to erroneously round up. + // I'm manually adjusting coordinates to the centre of texels here, + // though the centre is just paranoia, the top left corner works fine. + // As of 2018 this issue is still present. + uv = (trunc(uv * cb.wh.zw) + 0.5) / cb.wh.zw; + } + uv *= cb.st_scale; + + if (PS_AUTOMATIC_LOD) + { + return sample_tex(tex_sampler, uv); + } + else if (PS_MANUAL_LOD) + { + float K = cb.uv_min_max.x; + float L = cb.uv_min_max.y; + float bias = cb.uv_min_max.z; + float max_lod = cb.uv_min_max.w; + + float gs_lod = K - log2(abs(in.t.w)) * L; + // FIXME max useful ? + //float lod = max(min(gs_lod, max_lod) - bias, 0.f); + float lod = min(gs_lod, max_lod) - bias; + + return sample_tex(tex_sampler, uv, level(lod)); + } + else + { + return sample_tex(tex_sampler, uv, level(0)); + } + } + + float4 sample_p(float idx) + { + return palette.sample(palette_sampler, float2(idx, 0)); + } + + float4 clamp_wrap_uv(float4 uv) + { + float4 uv_out = uv; + float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy; + + if (PS_WMS == PS_WMT) + { + if (PS_WMS == 2) + { + uv_out = clamp(uv, cb.uv_min_max.xyxy, cb.uv_min_max.zwzw); + } + else if (PS_WMS == 3) + { + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + if (!FST) + uv = fract(uv); + + uv_out = float4((ushort4(uv * tex_size) & ushort4(cb.uv_msk_fix.xyxy)) | ushort4(cb.uv_msk_fix.zwzw)) / tex_size; + } + } + else + { + if (PS_WMS == 2) + { + uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz); + } + else if (PS_WMS == 3) + { + if (!FST) + uv.xz = fract(uv.xz); + + uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx; + } + + if (PS_WMT == 2) + { + uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww); + } + else if (PS_WMT == 3) + { + if (!FST) + uv.yw = fract(uv.yw); + + uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy; + } + } + + return uv_out; + } + + float4x4 sample_4c(float4 uv) + { + return { + sample_c(uv.xy), + sample_c(uv.zy), + sample_c(uv.xw), + sample_c(uv.zw), + }; + } + + float4 sample_4_index(float4 uv) + { + float4 c; + + // Either GS will send a texture that contains a single alpha channel + // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel + + // Note: texture gather can't be used because of special clamping/wrapping + // Also it doesn't support lod + c.x = sample_c(uv.xy).a; + c.y = sample_c(uv.zy).a; + c.z = sample_c(uv.xw).a; + c.w = sample_c(uv.zw).a; + + uchar4 i = uchar4(c * 255.5f); // Denormalize value + + if (PS_PAL_FMT == 1) + return float4(i & 0xF) / 255.f; + if (PS_PAL_FMT == 2) + return float4(i >> 4) / 255.f; + + // Most textures will hit this code so keep normalized float value + return c; + } + + float4x4 sample_4p(float4 u) + { + return { + sample_p(u.x), + sample_p(u.y), + sample_p(u.z), + sample_p(u.w), + }; + } + + uint fetch_raw_depth() + { + return tex_depth.read(ushort2(in.p.xy)) * 0x1p32f; + } + + float4 fetch_raw_color() + { + if (PS_TEX_IS_FB) + return current_color; + else + return tex.read(ushort2(in.p.xy)); + } + + float4 fetch_c(ushort2 uv) + { + return PS_TEX_IS_DEPTH ? tex_depth.read(uv) : tex.read(uv); + } + + // MARK: Depth sampling + + ushort2 clamp_wrap_uv_depth(ushort2 uv) + { + ushort2 uv_out = uv; + // Keep the full precision + // It allow to multiply the ScalingFactor before the 1/16 coeff + ushort4 mask = ushort4(cb.uv_msk_fix) << 4; + + if (PS_WMS == PS_WMT) + { + if (PS_WMS == 2) + uv_out = clamp(uv, mask.xy, mask.zw); + else if (PS_WMS == 3) + uv_out = (uv & mask.xy) | mask.zw; + } + else + { + if (PS_WMS == 2) + uv_out.x = clamp(uv.x, mask.x, mask.z); + else if (PS_WMS == 3) + uv_out.x = (uv.x & mask.x) | mask.z; + + if (PS_WMT == 2) + uv_out.y = clamp(uv.y, mask.y, mask.w); + else if (PS_WMT == 3) + uv_out.y = (uv.y & mask.y) | mask.w; + } + + return uv_out; + } + + float4 sample_depth(float2 st) + { + float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f)); + ushort2 uv = ushort2(uv_f); + + float4 t = float4(0); + if (PS_TALES_OF_ABYSS_HLE) + { + // Warning: UV can't be used in channel effect + ushort depth = fetch_raw_depth(); + // Convert msb based on the palette + t = palette.read(ushort2((depth >> 8) & 0xFF, 0)) * 255.f; + } + else if (PS_URBAN_CHAOS_HLE) + { + // Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel. + // So it will do a first channel trick to extract lsb, value is right-shifted. + // Then a new channel trick to extract msb which will shifted to the left. + // OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion. + // To be faster both steps (msb&lsb) are done in a single pass. + + // Warning: UV can't be used in channel effect + ushort depth = fetch_raw_depth(); + + // Convert lsb based on the palette + t = palette.read(ushort2(depth & 0xFF, 0)) * 255.f; + + // Msb is easier + float green = float((depth >> 8) & 0xFF) * 36.f; + green = min(green, 255.0f); + + t.g += green; + } + else if (PS_DEPTH_FMT == 1) + { + t = convert_depth32_rgba8(fetch_c(uv).r); + } + else if (PS_DEPTH_FMT == 2) + { + t = convert_depth16_rgba8(fetch_c(uv).r); + } + else if (PS_DEPTH_FMT == 3) + { + t = fetch_c(uv) * 255.f; + } + + if (PS_AEM_FMT == FMT_24) + t.a = (!PS_AEM || any(bool3(t.rgb))) ? 255.f * cb.ta.x : 0.f; + else if (PS_AEM_FMT == FMT_16) + t.a = t.a >= 128.f ? 255.f * cb.ta.y : (!PS_AEM || any(bool3(t.rgb))) ? 255.f * cb.ta.x : 0.f; + + return t; + } + + // MARK: Fetch a Single Channel + + float4 fetch_red() + { + float rt = PS_TEX_IS_DEPTH ? float(fetch_raw_depth() & 0xFF) / 255.f : fetch_raw_color().r; + return sample_p(rt) * 255.f; + } + + float4 fetch_green() + { + float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 8) & 0xFF) / 255.f : fetch_raw_color().g; + return sample_p(rt) * 255.f; + } + + float4 fetch_blue() + { + float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 16) & 0xFF) / 255.f : fetch_raw_color().b; + return sample_p(rt) * 255.f; + } + + float4 fetch_alpha() + { + return sample_p(fetch_raw_color().a) * 255.f; + } + + float4 fetch_rgb() + { + float4 rt = fetch_raw_color(); + return float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1) * 255.f; + } + + float4 fetch_gXbY() + { + if (PS_TEX_IS_DEPTH) + { + uint depth = fetch_raw_depth(); + uint bg = (depth >> (8 + cb.channel_shuffle.green_shift)) & 0xFF; + return float4(bg); + } + else + { + uchar4 rt = uchar4(fetch_raw_color() * 255.5f); + uchar green = (rt.g >> cb.channel_shuffle.green_shift) & cb.channel_shuffle.green_mask; + uchar blue = (rt.b >> cb.channel_shuffle.blue_shift) & cb.channel_shuffle.blue_mask; + return float4(green | blue); + } + } + + float4 sample_color(float2 st) + { + if (PS_TCOFFSETHACK) + st += cb.tc_offset; + + float4 t; + float4x4 c; + float2 dd; + + if (!PS_LTF && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2) + { + c[0] = sample_c(st); + } + else + { + float4 uv; + if (PS_LTF) + { + uv = st.xyxy + cb.half_texel; + dd = fract(uv.xy * cb.wh.zw); + if (!FST) + { + // Background in Shin Megami Tensei Lucifers + // I suspect that uv isn't a standard number, so fract is outside of the [0;1] range + dd = saturate(dd); + } + } + else + { + uv = st.xyxy; + } + + uv = clamp_wrap_uv(uv); + + if (PS_PAL_FMT != 0) + c = sample_4p(sample_4_index(uv)); + else + c = sample_4c(uv); + } + + for (int i = 0; i < 4; i++) + { + if (PS_AEM_FMT == FMT_24) + c[i].a = !PS_AEM || any(bool3(c[i].rgb)) ? cb.ta.x : 0.f; + else if (PS_AEM_FMT == FMT_16) + c[i].a = c[i].a >= 0.5 ? cb.ta.y : !PS_AEM || any(bool3(c[i].rgb)) ? cb.ta.x : 0.f; + } + + if (PS_LTF) + t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); + else + t = c[0]; + + // The 0.05f helps to fix the overbloom of sotc + // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) + // interpolation could be slightly below the correct one. + return trunc(t * 255.f + 0.05f); + } + + float4 tfx(float4 T, float4 C) + { + float4 C_out; + float4 FxT = trunc(trunc(C) * T / 128.f); + if (PS_TFX == 0) + C_out = FxT; + else if (PS_TFX == 1) + C_out = T; + else if (PS_TFX == 2) + C_out = float4(FxT.rgb, T.a) + C.a; + else if (PS_TFX == 3) + C_out = float4(FxT.rgb + C.a, T.a); + else + C_out = C; + + if (!PS_TCC) + C_out.a = C.a; + + // Clamp only when it is useful + if (PS_TFX == 0 || PS_TFX == 2 || PS_TFX == 3) + C_out = min(C_out, 255.f); + + return C_out; + } + + bool atst(float4 C) + { + float a = C.a; + switch (PS_ATST) + { + case 0: + break; // Nothing to do + case 1: + if (a > cb.aref) + return false; + break; + case 2: + if (a < cb.aref) + return false; + break; + case 3: + if (abs(a - cb.aref) > 0.5f) + return false; + break; + case 4: + if (abs(a - cb.aref) < 0.5f) + return false; + break; + } + return true; + } + + void fog(thread float4& C, float f) + { + if (PS_FOG) + C.rgb = trunc(mix(cb.fog_color, C.rgb, f)); + } + + float4 ps_color() + { + float2 st, st_int; + if (!FST && PS_INVALID_TEX0) + { + st = (in.t.xy * cb.wh.xy) / (in.t.w * cb.wh.zw); + } + else if (!FST) + { + st = in.t.xy / in.t.w; + st_int = in.ti.zw / in.t.w; + } + else + { + // Note: xy are normalized coordinates + st = in.ti.xy; + st_int = in.ti.zw; + } + + float4 T; + if (PS_CHANNEL == 1) + T = fetch_red(); + else if (PS_CHANNEL == 2) + T = fetch_green(); + else if (PS_CHANNEL == 3) + T = fetch_blue(); + else if (PS_CHANNEL == 4) + T = fetch_alpha(); + else if (PS_CHANNEL == 5) + T = fetch_rgb(); + else if (PS_CHANNEL == 6) + T = fetch_gXbY(); + else if (PS_DEPTH_FMT != 0) + T = sample_depth(st_int); + else + T = sample_color(st); + + float4 C = tfx(T, IIP ? in.c : in.fc); + if (!atst(C)) + discard_fragment(); + fog(C, in.t.z); + + return C; + } + + void ps_fbmask(thread float4& C) + { + if (PS_FBMASK) + C = float4((uint4(C) & ~cb.fbmask) | (uint4(current_color * 255.5) & cb.fbmask)); + } + + void ps_dither(thread float4& C) + { + if (PS_DITHER == 0) + return; + ushort2 fpos; + if (PS_DITHER == 2) + fpos = ushort2(in.p.xy); + else + fpos = ushort2(in.p.xy / float2(SCALING_FACTOR)); + C.rgb += cb.dither_matrix[fpos.y & 3][fpos.x & 3]; + } + + void ps_color_clamp_wrap(thread float4& C) + { + // When dithering the bottom 3 bits become meaningless and cause lines in the picture so we need to limit the color depth on dithered items + if (!SW_BLEND && !PS_DITHER) + return; + + // Correct the Color value based on the output format + if (!PS_COLCLIP && !PS_HDR) + C.rgb = clamp(C.rgb, 0.f, 255.f); // Standard Clamp + + // FIXME rouding of negative float? + // compiler uses trunc but it might need floor + + // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy + // GS: Color = 1, Alpha = 255 => output 1 + // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 + if (PS_DFMT == FMT_16 && (PS_HDR || !PS_BLEND_MIX)) + // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania + C.rgb = float3(short3(C.rgb) & 0xF8); + else if (PS_COLCLIP && !PS_HDR) + C.rgb = float3(short3(C.rgb) & 0xFF); + } + + template + static T pick(uint selector, T zero, T one, T two) + { + return selector == 0 ? zero : selector == 1 ? one : two; + } + + void ps_blend(thread float4& Color, float As) + { + if (SW_BLEND) + { + + float Ad = PS_DFMT == FMT_24 ? 1.f : trunc(current_color.a * 255.5f) / 128.f; + + float3 Cd = trunc(current_color.rgb * 255.5f); + float3 Cs = Color.rgb; + + float3 A = pick(PS_BLEND_A, Cs, Cd, float3(0.f)); + float3 B = pick(PS_BLEND_B, Cs, Cd, float3(0.f)); + float C = pick(PS_BLEND_C, As, Ad, cb.alpha_fix); + float3 D = pick(PS_BLEND_D, Cs, Cd, float3(0.f)); + + if (PS_BLEND_MIX) + C = min(C, 1.f); + + if (PS_BLEND_A == PS_BLEND_B) + Color.rgb = D; + else + Color.rgb = trunc((A - B) * C + D); + + if (PS_PABE) + Color.rgb = (As >= 1.f) ? Color.rgb : Cs; + } + else + { + // Needed for Cd * (As/Ad/F + 1) blending mdoes + if (PS_CLR_HW == 1 || PS_CLR_HW == 5) + { + Color.rgb = 255.f; + } + else if (PS_CLR_HW == 2 || PS_CLR_HW == 4) + { + float Alpha = PS_BLEND_C == 2 ? cb.alpha_fix : As; + Color.rgb = saturate(Alpha - 1.f) * 255.f; + } + else if (PS_CLR_HW == 3) + { + // Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad + // Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value + Color.rgb *= (255.f / 128.f); + } + } + } + + MainPSOut ps_main() + { + MainPSOut out = {}; + + if (PS_SCANMSK & 2) + { + if ((uint(in.p.y) & 1) == (PS_SCANMSK & 1)) + discard_fragment(); + } + + if (PS_DATE >= 5) + { + // 1 => DATM == 0, 2 => DATM == 1 + float rt_a = PS_WRITE_RG ? current_color.g : current_color.a; + bool bad = (PS_DATE & 3) == 1 ? (rt_a > 0.5) : (rt_a < 0.5); + + if (bad) + discard_fragment(); + } + + if (PS_DATE == 3) + { + float stencil_ceil = prim_id_tex.read(uint2(in.p.xy)).r; + // Note prim_id == stencil_ceil will be the primitive that will update + // the bad alpha value so we must keep it. + if (float(prim_id) > stencil_ceil) + discard_fragment(); + } + + float4 C = ps_color(); + + if (PS_SHUFFLE) + { + uchar4 denorm_c = uchar4(C); + uchar2 denorm_TA = uchar2(cb.ta * 255.5f); + + C.rb = PS_READ_BA ? C.bb : C.rr; + if (PS_READ_BA) + C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + else + C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + } + + // Must be done before alpha correction + float alpha_blend = SW_AD_TO_HW ? (PS_DFMT == FMT_24 ? 1.f : trunc(current_color.a * 255.5f) / 128.f) : (C.a / 128.f); + + if (PS_DFMT == FMT_16) + { + float A_one = 128.f; + C.a = (PS_FBA) ? A_one : step(128.f, C.a) * A_one; + } + else if (PS_DFMT == FMT_32 && PS_FBA) + { + if (C.a < 128.f) + C.a += 128.f; + } + + // Get first primitive that will write a failing alpha value + if (PS_DATE == 1) + { + // DATM == 0, Pixel with alpha equal to 1 will failed (128-255) + out.c0 = C.a > 127.5f ? float(prim_id) : FLT_MAX; + return out; + } + else if (PS_DATE == 2) + { + // DATM == 1, Pixel with alpha equal to 0 will failed (0-127) + out.c0 = C.a < 127.5f ? float(prim_id) : FLT_MAX; + return out; + } + + ps_blend(C, alpha_blend); + + ps_dither(C); + + // Color clamp/wrap needs to be done after sw blending and dithering + ps_color_clamp_wrap(C); + + ps_fbmask(C); + + if (PS_COLOR0) + out.c0 = C / 255.f; + if (PS_COLOR0 && PS_ONLY_ALPHA) + out.c0.rgb = 0; + if (PS_COLOR1) + out.c1 = alpha_blend; + if (PS_ZCLAMP) + out.depth = min(in.p.z, cb.max_depth); + + return out; + } +}; + +#if FBFETCH_SUPPORT +fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) +{ + return in * 2; +} + +constant bool NEEDS_RT_TEX = NEEDS_RT && !HAS_FBFETCH; +constant bool NEEDS_RT_FBF = NEEDS_RT && HAS_FBFETCH; +#else +constant bool NEEDS_RT_TEX = NEEDS_RT; +#endif + +fragment MainPSOut ps_main( + MainPSIn in [[stage_in]], + constant GSMTLMainPSUniform& cb [[buffer(GSMTLBufferIndexHWUniforms)]], + sampler s [[sampler(0)]], +#if PRIMID_SUPPORT + uint primid [[primitive_id, function_constant(NEEDS_PRIMID)]], +#endif +#if FBFETCH_SUPPORT + float4 rt_fbf [[color(0), raster_order_group(0), function_constant(NEEDS_RT_FBF)]], +#endif + texture2d tex [[texture(GSMTLTextureIndexTex), function_constant(PS_TEX_IS_COLOR)]], + depth2d depth [[texture(GSMTLTextureIndexTex), function_constant(PS_TEX_IS_DEPTH)]], + texture2d palette [[texture(GSMTLTextureIndexPalette), function_constant(PS_HAS_PALETTE)]], + texture2d rt [[texture(GSMTLTextureIndexRenderTarget), function_constant(NEEDS_RT_TEX)]], + texture2d primidtex [[texture(GSMTLTextureIndexPrimIDs), function_constant(PS_PRIM_CHECKING_READ)]]) +{ + PSMain main(in, cb); + main.tex_sampler = s; + if (PS_TEX_IS_COLOR) + main.tex = tex; + else + main.tex_depth = depth; + if (PS_HAS_PALETTE) + main.palette = palette; + if (PS_PRIM_CHECKING_READ) + main.prim_id_tex = primidtex; +#if PRIMID_SUPPORT + if (NEEDS_PRIMID) + main.prim_id = primid; +#endif + + if (NEEDS_RT) + { +#if FBFETCH_SUPPORT + main.current_color = HAS_FBFETCH ? rt_fbf : rt.read(uint2(in.p.xy)); +#else + main.current_color = rt.read(uint2(in.p.xy)); +#endif + } + else + { + main.current_color = 0; + } + + return main.ps_main(); +} + +#if PRIMID_SUPPORT +fragment uint primid_test(uint id [[primitive_id]]) +{ + return id; +} +#endif + +// MARK: Markers for detecting the Metal version a metallib was compiled against + +#if __METAL_VERSION__ >= 210 +kernel void metal_version_21() {} +#endif +#if __METAL_VERSION__ >= 220 +kernel void metal_version_22() {} +#endif +#if __METAL_VERSION__ >= 230 +kernel void metal_version_23() {} +#endif diff --git a/pcsx2/HostDisplay.cpp b/pcsx2/HostDisplay.cpp index 1e5c9055c6..56af502f8b 100644 --- a/pcsx2/HostDisplay.cpp +++ b/pcsx2/HostDisplay.cpp @@ -32,8 +32,19 @@ HostDisplay::~HostDisplay() = default; const char* HostDisplay::RenderAPIToString(RenderAPI api) { - static const char* names[] = {"None", "D3D11", "Vulkan", "OpenGL", "OpenGLES"}; - return (static_cast(api) >= std::size(names)) ? names[0] : names[static_cast(api)]; + switch (api) + { +#define CASE(x) case RenderAPI::x: return #x + CASE(None); + CASE(D3D11); + CASE(Metal); + CASE(Vulkan); + CASE(OpenGL); + CASE(OpenGLES); +#undef CASE + default: + return "Unknown"; + } } bool HostDisplay::UsesLowerLeftOrigin() const @@ -122,6 +133,7 @@ std::string HostDisplay::GetFullscreenModeString(u32 width, u32 height, float re #ifdef _WIN32 #include "Frontend/D3D11HostDisplay.h" #endif +#include "GS/Renderers/Metal/GSMetalCPPAccessible.h" std::unique_ptr HostDisplay::CreateDisplayForAPI(RenderAPI api) { diff --git a/pcsx2/HostDisplay.h b/pcsx2/HostDisplay.h index 3388bb134e..adcbe4428f 100644 --- a/pcsx2/HostDisplay.h +++ b/pcsx2/HostDisplay.h @@ -46,6 +46,7 @@ public: { None, D3D11, + Metal, Vulkan, OpenGL, OpenGLES