GS: Add Metal renderer

This commit is contained in:
TellowKrinkle 2021-11-16 05:17:30 -06:00 committed by tellowkrinkle
parent 24b2277206
commit 5ecaa9459d
25 changed files with 5136 additions and 17 deletions

View File

@ -1,4 +1,4 @@
#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK)
#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK) || defined(__METAL_VERSION__)
#ifndef FXAA_GLSL_130
#define FXAA_GLSL_130 0
@ -47,6 +47,8 @@ struct PS_OUTPUT
float4 c : SV_Target0;
};
#elif defined(__METAL_VERSION__)
static constexpr sampler MAIN_SAMPLER(coord::normalized, address::clamp_to_edge, filter::linear);
#endif
/*------------------------------------------------------------------------------
@ -63,6 +65,9 @@ struct PS_OUTPUT
#elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1)
#define FXAA_GATHER4_ALPHA 1
#elif defined(__METAL_VERSION__)
#define FXAA_GATHER4_ALPHA 1
#endif
#if (FXAA_HLSL_5 == 1)
@ -98,6 +103,14 @@ struct FxaaTex { SamplerState smpl; Texture2D tex; };
#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
#endif
#elif defined(__METAL_VERSION__)
#define FxaaTex texture2d<float>
#define FxaaTexTop(t, p) t.sample(MAIN_SAMPLER, p)
#define FxaaTexOff(t, p, o, r) t.sample(MAIN_SAMPLER, p, o)
#define FxaaTexAlpha4(t, p) t.gather(MAIN_SAMPLER, p, 0, component::w)
#define FxaaTexOffAlpha4(t, p, o) t.gather(MAIN_SAMPLER, p, o, component::w)
#define FxaaDiscard discard_fragment()
#define FxaaSat(x) saturate(x)
#endif
#define FxaaEdgeThreshold 0.063
@ -151,14 +164,8 @@ float3 LinearToRGBGamma(float3 color, float gamma)
return color;
}
float4 PreGammaPass(float4 color, float2 uv0)
float4 PreGammaPass(float4 color)
{
#if (SHADER_MODEL >= 0x400)
color = Texture.Sample(TextureSampler, uv0);
#elif (FXAA_GLSL_130 == 1)
color = texture(TextureSampler, uv0);
#endif
const float GammaConst = 2.233;
color.rgb = RGBGammaToLinear(color.rgb, GammaConst);
color.rgb = LinearToRGBGamma(color.rgb, GammaConst);
@ -483,6 +490,8 @@ float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaS
float4 FxaaPass(float4 FxaaColor, float2 uv0)
#elif (SHADER_MODEL >= 0x400)
float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)
#elif defined(__METAL_VERSION__)
float4 FxaaPass(float4 FxaaColor, float2 uv0, texture2d<float> tex)
#endif
{
@ -498,6 +507,9 @@ float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)
#elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1)
vec2 PixelSize = textureSize(TextureSampler, 0);
FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
#elif defined(__METAL_VERSION__)
float2 PixelSize = float2(tex.get_width(), tex.get_height());
FxaaColor = FxaaPixelShader(uv0, tex, 1.f/PixelSize, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
#endif
return FxaaColor;
@ -511,7 +523,7 @@ float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)
void main()
{
vec4 color = texture(TextureSampler, PSin_t);
color = PreGammaPass(color, PSin_t);
color = PreGammaPass(color);
color = FxaaPass(color, PSin_t);
SV_Target0 = color;
@ -524,7 +536,7 @@ PS_OUTPUT ps_main(VS_OUTPUT input)
float4 color = Texture.Sample(TextureSampler, input.t);
color = PreGammaPass(color, input.t);
color = PreGammaPass(color);
color = FxaaPass(color, input.t);
output.c = color;
@ -532,6 +544,7 @@ PS_OUTPUT ps_main(VS_OUTPUT input)
return output;
}
// Metal main function in in fxaa.metal
#endif
#endif

View File

@ -305,9 +305,14 @@ endif()
# MacOS-specific things
#-------------------------------------------------------------------------------
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.13)
if(NOT CMAKE_GENERATOR MATCHES "Xcode")
# Assume Xcode builds aren't being used for distribution
# Helpful because Xcode builds don't build multiple metallibs for different macOS versions
# Also helpful because Xcode's interactive shader debugger requires apps be built for the latest macOS
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.13)
endif()
if (APPLE AND ${CMAKE_OSX_DEPLOYMENT_TARGET} VERSION_LESS 10.14 AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 9)
if (APPLE AND CMAKE_OSX_DEPLOYMENT_TARGET AND "${CMAKE_OSX_DEPLOYMENT_TARGET}" VERSION_LESS 10.14 AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 9)
# Older versions of the macOS stdlib don't have operator new(size_t, align_val_t)
# Disable use of them with this flag
# Not great, but also no worse that what we were getting before we turned on C++17

View File

@ -86,6 +86,7 @@ target_sources(common PRIVATE
MemcpyFast.h
MemsetFast.inl
MD5Digest.h
MRCHelpers.h
Path.h
PageFaultSource.h
PrecompiledHeader.h
@ -185,8 +186,8 @@ elseif(APPLE)
GL/ContextAGL.h
)
set_source_files_properties(GL/ContextAGL.mm PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
target_compile_options(common PUBLIC -fobjc-arc)
target_link_options(common PUBLIC -fobjc-link-runtime)
target_compile_options(common PRIVATE -fobjc-arc)
target_link_options(common PRIVATE -fobjc-link-runtime)
else()
if(X11_API OR WAYLAND_API)
target_sources(common PRIVATE

97
common/MRCHelpers.h Normal file
View File

@ -0,0 +1,97 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __OBJC__
#error This header is for use with Objective-C++ only.
#endif
#if __has_feature(objc_arc)
#error This file is for manual reference counting! Compile without -fobjc-arc
#endif
#pragma once
#include <cstddef>
#include <utility>
/// Managed Obj-C pointer
template <typename T>
class MRCOwned
{
T ptr;
MRCOwned(T ptr): ptr(ptr) {}
public:
MRCOwned(): ptr(nullptr) {}
MRCOwned(std::nullptr_t): ptr(nullptr) {}
MRCOwned(MRCOwned&& other)
: ptr(other.ptr)
{
other.ptr = nullptr;
}
MRCOwned(const MRCOwned& other)
: ptr(other.ptr)
{
[ptr retain];
}
~MRCOwned()
{
if (ptr)
[ptr release];
}
operator T() const { return ptr; }
MRCOwned& operator=(const MRCOwned& other)
{
[other.ptr retain];
if (ptr)
[ptr release];
ptr = other.ptr;
return *this;
}
MRCOwned& operator=(MRCOwned&& other)
{
std::swap(ptr, other.ptr);
return *this;
}
void Reset()
{
[ptr release];
ptr = nullptr;
}
T Get() const { return ptr; }
static MRCOwned Transfer(T ptr)
{
return MRCOwned(ptr);
}
static MRCOwned Retain(T ptr)
{
[ptr retain];
return MRCOwned(ptr);
}
};
/// Take ownership of an Obj-C pointer (equivalent to __bridge_transfer)
template<typename T>
static inline MRCOwned<T> MRCTransfer(T ptr)
{
return MRCOwned<T>::Transfer(ptr);
}
/// Retain an Obj-C pointer (equivalent to __bridge)
template<typename T>
static inline MRCOwned<T> MRCRetain(T ptr)
{
return MRCOwned<T>::Retain(ptr);
}

View File

@ -789,6 +789,14 @@ if(USE_VULKAN)
)
endif()
set(pcsx2GSMetalShaders
GS/Renderers/Metal/convert.metal
GS/Renderers/Metal/merge.metal
GS/Renderers/Metal/interlace.metal
GS/Renderers/Metal/tfx.metal
GS/Renderers/Metal/fxaa.metal
)
if(NOT PCSX2_CORE)
list(APPEND pcsx2GSSources
GS/Window/GSwxDialog.cpp
@ -1004,6 +1012,26 @@ if(WIN32)
list(APPEND pcsx2FrontendHeaders
Frontend/D3D11HostDisplay.h
)
elseif(APPLE)
list(APPEND pcsx2GSSources
GS/Renderers/Metal/GSDeviceMTL.mm
GS/Renderers/Metal/GSMTLDeviceInfo.mm
GS/Renderers/Metal/GSTextureMTL.mm
)
list(APPEND pcsx2GSHeaders
GS/Renderers/Metal/GSDeviceMTL.h
GS/Renderers/Metal/GSMetalCPPAccessible.h
GS/Renderers/Metal/GSMTLDeviceInfo.h
GS/Renderers/Metal/GSMTLSharedHeader.h
GS/Renderers/Metal/GSMTLShaderCommon.h
GS/Renderers/Metal/GSTextureMTL.h
)
list(APPEND pcsx2FrontendSources
Frontend/MetalHostDisplay.mm
)
list(APPEND pcsx2FrontendHeaders
Frontend/MetalHostDisplay.h
)
endif()
if(PCSX2_CORE)
@ -1710,6 +1738,49 @@ if(GETTEXT_FOUND AND NOT NO_TRANSLATION AND NOT PCSX2_CORE)
endif()
if (APPLE)
find_library(METAL_LIBRARY Metal)
target_link_libraries(PCSX2 PRIVATE ${METAL_LIBRARY})
if(CMAKE_GENERATOR MATCHES "Xcode")
# If we're generating an xcode project, you can just add the shaders to the main pcsx2 target and xcode will deal with them properly
# This will make sure xcode supplies code completion, etc (if you use a custom command, it won't)
set_target_properties(PCSX2 PROPERTIES
XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE
)
foreach(shader IN LISTS pcsx2GSMetalShaders)
target_sources(PCSX2 PRIVATE ${shader})
set_source_files_properties(${shader} PROPERTIES LANGUAGE METAL)
endforeach()
else()
function(generateMetallib std target outputName)
set(pcsx2GSMetalShaderOut)
set(flags
-ffast-math
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-gline-tables-only>
$<$<NOT:$<CONFIG:Release,MinSizeRel>>:-MO>
)
foreach(shader IN LISTS pcsx2GSMetalShaders)
set(shaderOut ${CMAKE_CURRENT_BINARY_DIR}/${outputName}/${shader}.air)
list(APPEND pcsx2GSMetalShaderOut ${shaderOut})
get_filename_component(shaderDir ${shaderOut} DIRECTORY)
add_custom_command(OUTPUT ${shaderOut}
COMMAND ${CMAKE_COMMAND} -E make_directory ${shaderDir}
COMMAND xcrun metal ${flags} -std=${std} -target ${target} -o ${shaderOut} -c ${CMAKE_CURRENT_SOURCE_DIR}/${shader}
DEPENDS ${shader} GS/Renderers/Metal/GSMTLSharedHeader.h GS/Renderers/Metal/GSMTLShaderCommon.h
)
set(metallib ${CMAKE_CURRENT_BINARY_DIR}/${outputName}.metallib)
endforeach()
add_custom_command(OUTPUT ${metallib}
COMMAND xcrun metallib -o ${metallib} ${pcsx2GSMetalShaderOut}
DEPENDS ${pcsx2GSMetalShaderOut}
)
pcsx2_resource(${metallib} ${CMAKE_CURRENT_BINARY_DIR})
endfunction()
generateMetallib(macos-metal2.0 air64-apple-macos10.13 default)
generateMetallib(macos-metal2.2 air64-apple-macos10.15 Metal22)
generateMetallib(macos-metal2.3 air64-apple-macos11.0 Metal23)
endif()
# MacOS defaults to having a maximum protection of the __DATA segment of rw (non-executable)
# We have a bunch of page-sized arrays in bss that we use for jit
# Obviously not being able to make those arrays executable would be a problem
@ -1766,6 +1837,7 @@ source_group(System/Ps2/DEV9 REGULAR_EXPRESSION DEV9/*)
source_group(System/Ps2/PAD FILES ${pcsx2PADSources} ${pcsx2PADHeaders})
source_group(System/Ps2/SPU2 REGULAR_EXPRESSION SPU2/*)
source_group(System/Ps2/USB REGULAR_EXPRESSION USB/*)
source_group(System/Ps2/GS/Renderers/Metal REGULAR_EXPRESSION GS/Renderers/Metal/*)
# Generated resource files
source_group(Resources/GUI FILES ${pcsx2GuiResources})

View File

@ -0,0 +1,84 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "HostDisplay.h"
#ifndef __OBJC__
#error "This header is for use with Objective-C++ only.
#endif
#ifdef __APPLE__
#include "GS/Renderers/Metal/GSMTLDeviceInfo.h"
#include <AppKit/AppKit.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
class MetalHostDisplay final : public HostDisplay
{
MRCOwned<NSView*> m_view;
MRCOwned<CAMetalLayer*> m_layer;
GSMTLDevice m_dev;
MRCOwned<id<MTLCommandQueue>> m_queue;
MRCOwned<id<MTLTexture>> m_font_tex;
MRCOwned<id<CAMetalDrawable>> m_current_drawable;
MRCOwned<MTLRenderPassDescriptor*> m_pass_desc;
u32 m_capture_start_frame;
void AttachSurfaceOnMainThread();
void DetachSurfaceOnMainThread();
public:
MetalHostDisplay();
~MetalHostDisplay();
RenderAPI GetRenderAPI() const override;
void* GetRenderDevice() const override;
void* GetRenderContext() const override;
void* GetRenderSurface() const override;
bool HasRenderDevice() const override;
bool HasRenderSurface() const override;
bool CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, VsyncMode vsync, bool threaded_presentation, bool debug_device) override;
bool InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device) override;
bool MakeRenderContextCurrent() override;
bool DoneRenderContextCurrent() override;
void DestroyRenderDevice() override;
void DestroyRenderSurface() override;
bool ChangeRenderWindow(const WindowInfo& wi) override;
bool SupportsFullscreen() const override;
bool IsFullscreen() override;
bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override;
AdapterAndModeList GetAdapterAndModeList() override;
std::string GetDriverInfo() const override;
void ResizeRenderWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override;
std::unique_ptr<HostDisplayTexture> CreateTexture(u32 width, u32 height, const void* data, u32 data_stride, bool dynamic = false) override;
void UpdateTexture(id<MTLTexture> texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride);
void UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride) override;
bool BeginPresent(bool frame_skip) override;
void EndPresent() override;
void SetVSync(VsyncMode mode) override;
bool CreateImGuiContext() override;
void DestroyImGuiContext() override;
bool UpdateImGuiFontTexture() override;
bool GetHostRefreshRate(float* refresh_rate) override;
};
#endif

View File

@ -0,0 +1,410 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "MetalHostDisplay.h"
#include "GS/Renderers/Metal/GSMetalCPPAccessible.h"
#include "GS/Renderers/Metal/GSDeviceMTL.h"
#include <imgui.h>
#ifdef __APPLE__
class MetalHostDisplayTexture final : public HostDisplayTexture
{
MRCOwned<id<MTLTexture>> m_tex;
u32 m_width, m_height;
public:
MetalHostDisplayTexture(MRCOwned<id<MTLTexture>> tex, u32 width, u32 height)
: m_tex(std::move(tex))
, m_width(width)
, m_height(height)
{
}
void* GetHandle() const override { return (__bridge void*)m_tex; };
u32 GetWidth() const override { return m_width; }
u32 GetHeight() const override { return m_height; }
};
HostDisplay* MakeMetalHostDisplay()
{
return new MetalHostDisplay();
}
MetalHostDisplay::MetalHostDisplay()
{
}
MetalHostDisplay::~MetalHostDisplay()
{
}
HostDisplay::AdapterAndModeList GetMetalAdapterAndModeList()
{ @autoreleasepool {
HostDisplay::AdapterAndModeList list;
auto devs = MRCTransfer(MTLCopyAllDevices());
for (id<MTLDevice> dev in devs.Get())
list.adapter_names.push_back([[dev name] UTF8String]);
return list;
}}
template <typename Fn>
static void OnMainThread(Fn&& fn)
{
if ([NSThread isMainThread])
fn();
else
dispatch_sync(dispatch_get_main_queue(), fn);
}
HostDisplay::RenderAPI MetalHostDisplay::GetRenderAPI() const
{
return RenderAPI::Metal;
}
void* MetalHostDisplay::GetRenderDevice() const { return const_cast<void*>(static_cast<const void*>(&m_dev)); }
void* MetalHostDisplay::GetRenderContext() const { return (__bridge void*)m_queue; }
void* MetalHostDisplay::GetRenderSurface() const { return (__bridge void*)m_layer; }
bool MetalHostDisplay::HasRenderDevice() const { return m_dev.IsOk(); }
bool MetalHostDisplay::HasRenderSurface() const { return static_cast<bool>(m_layer);}
void MetalHostDisplay::AttachSurfaceOnMainThread()
{
ASSERT([NSThread isMainThread]);
m_view = MRCRetain((__bridge NSView*)m_window_info.window_handle);
[m_view setWantsLayer:YES];
[m_view setLayer:m_layer];
}
void MetalHostDisplay::DetachSurfaceOnMainThread()
{
ASSERT([NSThread isMainThread]);
[m_view setLayer:nullptr];
[m_view setWantsLayer:NO];
m_view = nullptr;
}
bool MetalHostDisplay::CreateRenderDevice(const WindowInfo& wi, std::string_view adapter_name, VsyncMode vsync, bool threaded_presentation, bool debug_device)
{ @autoreleasepool {
m_window_info = wi;
pxAssertRel(!m_dev.dev, "Device already created!");
std::string null_terminated_adapter_name(adapter_name);
NSString* ns_adapter_name = [NSString stringWithUTF8String:null_terminated_adapter_name.c_str()];
auto devs = MRCTransfer(MTLCopyAllDevices());
for (id<MTLDevice> dev in devs.Get())
{
if ([[dev name] isEqualToString:ns_adapter_name])
m_dev = GSMTLDevice(MRCRetain(dev));
}
if (!m_dev.dev)
{
if (!adapter_name.empty())
Console.Warning("Metal: Couldn't find adapter %s, using default", null_terminated_adapter_name.c_str());
m_dev = GSMTLDevice(MRCTransfer(MTLCreateSystemDefaultDevice()));
}
m_queue = MRCTransfer([m_dev.dev newCommandQueue]);
m_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]);
[m_pass_desc colorAttachments][0].loadAction = MTLLoadActionClear;
[m_pass_desc colorAttachments][0].clearColor = MTLClearColorMake(0, 0, 0, 0);
[m_pass_desc colorAttachments][0].storeAction = MTLStoreActionStore;
m_capture_start_frame = 0;
if (char* env = getenv("MTL_CAPTURE"))
{
m_capture_start_frame = atoi(env);
}
if (m_capture_start_frame)
{
Console.WriteLn("Metal will capture frame %u", m_capture_start_frame);
}
if (m_dev.IsOk() && m_queue)
{
OnMainThread([this]
{
m_layer = MRCRetain([CAMetalLayer layer]);
[m_layer setDrawableSize:CGSizeMake(m_window_info.surface_width, m_window_info.surface_height)];
[m_layer setDevice:m_dev.dev];
AttachSurfaceOnMainThread();
});
SetVSync(vsync);
return true;
}
else
return false;
}}
bool MetalHostDisplay::InitializeRenderDevice(std::string_view shader_cache_directory, bool debug_device)
{
return true;
}
bool MetalHostDisplay::MakeRenderContextCurrent() { return true; }
bool MetalHostDisplay::DoneRenderContextCurrent() { return true; }
void MetalHostDisplay::DestroyRenderDevice()
{
DestroyRenderSurface();
m_queue = nullptr;
m_dev.Reset();
}
void MetalHostDisplay::DestroyRenderSurface()
{
if (!m_layer)
return;
OnMainThread([this]{ DetachSurfaceOnMainThread(); });
m_layer = nullptr;
}
bool MetalHostDisplay::ChangeRenderWindow(const WindowInfo& wi)
{
OnMainThread([this, &wi]
{
DetachSurfaceOnMainThread();
m_window_info = wi;
AttachSurfaceOnMainThread();
});
return true;
}
bool MetalHostDisplay::SupportsFullscreen() const { return false; }
bool MetalHostDisplay::IsFullscreen() { return false; }
bool MetalHostDisplay::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) { return false; }
HostDisplay::AdapterAndModeList MetalHostDisplay::GetAdapterAndModeList()
{
return GetMetalAdapterAndModeList();
}
std::string MetalHostDisplay::GetDriverInfo() const
{ @autoreleasepool {
std::string desc([[m_dev.dev description] UTF8String]);
desc += "\n Texture Swizzle: " + std::string(m_dev.features.texture_swizzle ? "Supported" : "Unsupported");
desc += "\n Unified Memory: " + std::string(m_dev.features.unified_memory ? "Supported" : "Unsupported");
desc += "\n Framebuffer Fetch: " + std::string(m_dev.features.framebuffer_fetch ? "Supported" : "Unsupported");
desc += "\n Primitive ID: " + std::string(m_dev.features.primid ? "Supported" : "Unsupported");
desc += "\n Shader Version: " + std::string(to_string(m_dev.features.shader_version));
desc += "\n Max Texture Size: " + std::to_string(m_dev.features.max_texsize);
return desc;
}}
void MetalHostDisplay::ResizeRenderWindow(s32 new_window_width, s32 new_window_height, float new_window_scale)
{
m_window_info.surface_scale = new_window_scale;
if (m_window_info.surface_width == static_cast<u32>(new_window_width) && m_window_info.surface_height == static_cast<u32>(new_window_height))
return;
m_window_info.surface_width = new_window_width;
m_window_info.surface_height = new_window_height;
@autoreleasepool
{
[m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)];
}
}
std::unique_ptr<HostDisplayTexture> MetalHostDisplay::CreateTexture(u32 width, u32 height, const void* data, u32 data_stride, bool dynamic)
{ @autoreleasepool {
MTLTextureDescriptor* desc = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
width:width
height:height
mipmapped:false];
[desc setUsage:MTLTextureUsageShaderRead];
[desc setStorageMode:MTLStorageModePrivate];
MRCOwned<id<MTLTexture>> tex = MRCTransfer([m_dev.dev newTextureWithDescriptor:desc]);
if (!tex)
return nullptr; // Something broke yay
[tex setLabel:@"MetalHostDisplay Texture"];
if (data)
UpdateTexture(tex, 0, 0, width, height, data, data_stride);
return std::make_unique<MetalHostDisplayTexture>(std::move(tex), width, height);
}}
void MetalHostDisplay::UpdateTexture(id<MTLTexture> texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride)
{
id<MTLCommandBuffer> cmdbuf = [m_queue commandBuffer];
id<MTLBlitCommandEncoder> enc = [cmdbuf blitCommandEncoder];
size_t bytes = data_stride * height;
MRCOwned<id<MTLBuffer>> buf = MRCTransfer([m_dev.dev newBufferWithLength:bytes options:MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined]);
memcpy([buf contents], data, bytes);
[enc copyFromBuffer:buf
sourceOffset:0
sourceBytesPerRow:data_stride
sourceBytesPerImage:bytes
sourceSize:MTLSizeMake(width, height, 1)
toTexture:texture
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[enc endEncoding];
[cmdbuf commit];
}
void MetalHostDisplay::UpdateTexture(HostDisplayTexture* texture, u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_stride)
{ @autoreleasepool {
UpdateTexture((__bridge id<MTLTexture>)texture->GetHandle(), x, y, width, height, data, data_stride);
}}
static bool s_capture_next = false;
bool MetalHostDisplay::BeginPresent(bool frame_skip)
{ @autoreleasepool {
GSDeviceMTL* dev = static_cast<GSDeviceMTL*>(g_gs_device.get());
if (dev && m_capture_start_frame && dev->FrameNo() == m_capture_start_frame)
s_capture_next = true;
if (frame_skip || m_window_info.type == WindowInfo::Type::Surfaceless || !g_gs_device)
{
ImGui::EndFrame();
return false;
}
id<MTLCommandBuffer> buf = dev->GetRenderCmdBuf();
m_current_drawable = MRCRetain([m_layer nextDrawable]);
dev->EndRenderPass();
if (!m_current_drawable)
{
[buf pushDebugGroup:@"Present Skipped"];
[buf popDebugGroup];
dev->FlushEncoders();
ImGui::EndFrame();
return false;
}
[m_pass_desc colorAttachments][0].texture = [m_current_drawable texture];
id<MTLRenderCommandEncoder> enc = [buf renderCommandEncoderWithDescriptor:m_pass_desc];
[enc setLabel:@"Present"];
dev->m_current_render.encoder = MRCRetain(enc);
return true;
}}
void MetalHostDisplay::EndPresent()
{ @autoreleasepool {
GSDeviceMTL* dev = static_cast<GSDeviceMTL*>(g_gs_device.get());
pxAssertDev(dev && dev->m_current_render.encoder && dev->m_current_render_cmdbuf, "BeginPresent cmdbuf was destroyed");
ImGui::Render();
dev->RenderImGui(ImGui::GetDrawData());
dev->EndRenderPass();
if (m_current_drawable)
[dev->m_current_render_cmdbuf addScheduledHandler:[drawable = std::move(m_current_drawable)](id<MTLCommandBuffer>){
[drawable present];
}];
dev->FlushEncoders();
m_current_drawable = nullptr;
if (m_capture_start_frame)
{
if (@available(macOS 10.15, iOS 13, *))
{
static NSString* const path = @"/tmp/PCSX2MTLCapture.gputrace";
static u32 frames;
if (frames)
{
--frames;
if (!frames)
{
[[MTLCaptureManager sharedCaptureManager] stopCapture];
Console.WriteLn("Metal Trace Capture to /tmp/PCSX2MTLCapture.gputrace finished");
[[NSWorkspace sharedWorkspace] selectFile:path
inFileViewerRootedAtPath:@"/tmp/"];
}
}
else if (s_capture_next)
{
s_capture_next = false;
MTLCaptureManager* mgr = [MTLCaptureManager sharedCaptureManager];
if ([mgr supportsDestination:MTLCaptureDestinationGPUTraceDocument])
{
MTLCaptureDescriptor* desc = [[MTLCaptureDescriptor new] autorelease];
[desc setCaptureObject:m_dev.dev];
if ([[NSFileManager defaultManager] fileExistsAtPath:path])
[[NSFileManager defaultManager] removeItemAtPath:path error:nil];
[desc setOutputURL:[NSURL fileURLWithPath:path]];
[desc setDestination:MTLCaptureDestinationGPUTraceDocument];
NSError* err = nullptr;
[mgr startCaptureWithDescriptor:desc error:&err];
if (err)
{
Console.Error("Metal Trace Capture failed: %s", [[err localizedDescription] UTF8String]);
}
else
{
Console.WriteLn("Metal Trace Capture to /tmp/PCSX2MTLCapture.gputrace started");
frames = 2;
}
}
else
{
Console.Error("Metal Trace Capture Failed: MTLCaptureManager doesn't support GPU trace documents! (Did you forget to run with METAL_CAPTURE_ENABLED=1?)");
}
}
}
}
}}
void MetalHostDisplay::SetVSync(VsyncMode mode)
{
[m_layer setDisplaySyncEnabled:mode != VsyncMode::Off];
m_vsync_mode = mode;
}
bool MetalHostDisplay::CreateImGuiContext()
{
ImGuiIO& io = ImGui::GetIO();
io.BackendRendererName = "pcsx2_imgui_metal";
io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes.
return true;
}
void MetalHostDisplay::DestroyImGuiContext()
{
ImGui::GetIO().Fonts->SetTexID(nullptr);
}
bool MetalHostDisplay::UpdateImGuiFontTexture()
{ @autoreleasepool {
u8* data;
int width, height;
ImFontAtlas* fonts = ImGui::GetIO().Fonts;
fonts->GetTexDataAsAlpha8(&data, &width, &height);
MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatA8Unorm width:width height:height mipmapped:false];
[desc setUsage:MTLTextureUsageShaderRead];
[desc setStorageMode:MTLStorageModePrivate];
if (@available(macOS 10.15, *))
if (m_dev.features.texture_swizzle)
[desc setSwizzle:MTLTextureSwizzleChannelsMake(MTLTextureSwizzleOne, MTLTextureSwizzleOne, MTLTextureSwizzleOne, MTLTextureSwizzleAlpha)];
m_font_tex = MRCTransfer([m_dev.dev newTextureWithDescriptor:desc]);
[m_font_tex setLabel:@"ImGui Font"];
UpdateTexture(m_font_tex, 0, 0, width, height, data, width);
fonts->SetTexID((__bridge void*)m_font_tex);
return static_cast<bool>(m_font_tex);
}}
bool MetalHostDisplay::GetHostRefreshRate(float* refresh_rate)
{
OnMainThread([this, refresh_rate]
{
u32 did = [[[[[m_view window] screen] deviceDescription] valueForKey:@"NSScreenNumber"] unsignedIntValue];
if (CGDisplayModeRef mode = CGDisplayCopyDisplayMode(did))
{
*refresh_rate = CGDisplayModeGetRefreshRate(mode);
CGDisplayModeRelease(mode);
}
else
{
*refresh_rate = 0;
}
});
return *refresh_rate != 0;
}
#endif // __APPLE__

View File

@ -18,7 +18,8 @@
// clang-format off
// MacOS headers define PAGE_SIZE to the size of an x86 page
#ifdef PAGE_SIZE
#ifdef __APPLE__
#include <mach/vm_page_size.h>
#undef PAGE_SIZE
#endif

View File

@ -797,4 +797,7 @@ struct GSAdapter
#endif
};
template <>
struct std::hash<GSHWDrawConfig::PSSelector> : public GSHWDrawConfig::PSSelectorHash {};
extern std::unique_ptr<GSDevice> g_gs_device;

View File

@ -0,0 +1,398 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GS/Renderers/Common/GSDevice.h"
#ifndef __OBJC__
#error "This header is for use with Objective-C++ only.
#endif
#ifdef __APPLE__
#include "common/HashCombine.h"
#include "common/MRCHelpers.h"
#include "GS/GS.h"
#include "GSMTLDeviceInfo.h"
#include "GSMTLSharedHeader.h"
#include <AppKit/AppKit.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include <atomic>
#include <memory>
#include <unordered_map>
struct PipelineSelectorExtrasMTL
{
union
{
struct
{
GSTexture::Format rt : 4;
u8 writemask : 4;
GSDevice::BlendFactor src_factor : 4;
GSDevice::BlendFactor dst_factor : 4;
GSDevice::BlendOp blend_op : 2;
bool blend_enable : 1;
bool has_depth : 1;
bool has_stencil : 1;
};
u8 _key[3];
};
u32 fullkey() { return _key[0] | (_key[1] << 8) | (_key[2] << 16); }
PipelineSelectorExtrasMTL(): _key{} {}
PipelineSelectorExtrasMTL(GSHWDrawConfig::BlendState blend, GSTexture* rt, GSHWDrawConfig::ColorMaskSelector cms, bool has_depth, bool has_stencil)
: _key{}
{
this->rt = rt ? rt->GetFormat() : GSTexture::Format::Invalid;
MTLColorWriteMask mask = MTLColorWriteMaskNone;
if (cms.wr) mask |= MTLColorWriteMaskRed;
if (cms.wg) mask |= MTLColorWriteMaskGreen;
if (cms.wb) mask |= MTLColorWriteMaskBlue;
if (cms.wa) mask |= MTLColorWriteMaskAlpha;
this->writemask = mask;
this->src_factor = static_cast<GSDevice::BlendFactor>(blend.src_factor);
this->dst_factor = static_cast<GSDevice::BlendFactor>(blend.dst_factor);
this->blend_op = static_cast<GSDevice::BlendOp>(blend.op);
this->blend_enable = blend.enable;
this->has_depth = has_depth;
this->has_stencil = has_stencil;
}
};
struct PipelineSelectorMTL
{
GSHWDrawConfig::PSSelector ps;
PipelineSelectorExtrasMTL extras;
GSHWDrawConfig::VSSelector vs;
PipelineSelectorMTL()
{
memset(this, 0, sizeof(*this));
}
PipelineSelectorMTL(GSHWDrawConfig::VSSelector vs, GSHWDrawConfig::PSSelector ps, PipelineSelectorExtrasMTL extras)
{
memset(this, 0, sizeof(*this));
this->vs = vs;
this->ps = ps;
this->extras = extras;
}
PipelineSelectorMTL(const PipelineSelectorMTL& other)
{
memcpy(this, &other, sizeof(other));
}
PipelineSelectorMTL& operator=(const PipelineSelectorMTL& other)
{
memcpy(this, &other, sizeof(other));
return *this;
}
bool operator==(const PipelineSelectorMTL& other) const
{
return BitEqual(*this, other);
}
};
static_assert(sizeof(PipelineSelectorMTL) == 16);
template <>
struct std::hash<PipelineSelectorMTL>
{
size_t operator()(const PipelineSelectorMTL& sel) const
{
size_t h = 0;
size_t pieces[(sizeof(PipelineSelectorMTL) + sizeof(size_t) - 1) / sizeof(size_t)] = {};
memcpy(pieces, &sel, sizeof(PipelineSelectorMTL));
for (auto& piece : pieces)
HashCombine(h, piece);
return h;
}
};
class GSScopedDebugGroupMTL
{
id<MTLCommandBuffer> m_buffer;
public:
GSScopedDebugGroupMTL(id<MTLCommandBuffer> buffer, NSString* name): m_buffer(buffer)
{
[m_buffer pushDebugGroup:name];
}
~GSScopedDebugGroupMTL()
{
[m_buffer popDebugGroup];
}
};
struct ImDrawData;
class GSTextureMTL;
class GSDeviceMTL final : public GSDevice
{
public:
using DepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
using SamplerSelector = GSHWDrawConfig::SamplerSelector;
enum class LoadAction
{
DontCare,
DontCareIfFull,
Load,
};
class UsageTracker
{
struct UsageEntry
{
u64 drawno;
size_t pos;
};
std::vector<UsageEntry> m_usage;
size_t m_size = 0;
size_t m_pos = 0;
public:
size_t Size() { return m_size; }
size_t Pos() { return m_pos; }
bool PrepareForAllocation(u64 last_draw, size_t amt);
size_t Allocate(u64 current_draw, size_t amt);
void Reset(size_t new_size);
};
struct Map
{
id<MTLBuffer> gpu_buffer;
size_t gpu_offset;
void* cpu_buffer;
};
struct UploadBuffer
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr;
};
struct BufferPair
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> cpubuffer;
MRCOwned<id<MTLBuffer>> gpubuffer;
void* buffer = nullptr;
size_t last_upload = 0;
};
struct ConvertShaderVertex
{
simd_float2 pos;
simd_float2 texpos;
};
struct VSSelector
{
union
{
struct
{
bool iip : 1;
bool fst : 1;
bool point_size : 1;
};
u8 key;
};
VSSelector(): key(0) {}
VSSelector(u8 key): key(key) {}
};
using PSSelector = GSHWDrawConfig::PSSelector;
// MARK: Configuration
int m_mipmap;
// MARK: Permanent resources
std::shared_ptr<std::pair<std::mutex, GSDeviceMTL*>> m_backref;
GSMTLDevice m_dev;
MRCOwned<id<MTLCommandQueue>> m_queue;
MRCOwned<id<MTLFence>> m_draw_sync_fence;
MRCOwned<MTLFunctionConstantValues*> m_fn_constants;
MRCOwned<MTLVertexDescriptor*> m_hw_vertex;
std::unique_ptr<GSTextureMTL> m_font;
// Draw IDs are used to make sure we're not clobbering things
u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0};
// Functions and Pipeline States
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline[static_cast<int>(ShaderConvert::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_present_pipeline[static_cast<int>(ShaderConvert::Count)];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy[2];
MRCOwned<id<MTLRenderPipelineState>> m_convert_pipeline_copy_mask[1 << 4];
MRCOwned<id<MTLRenderPipelineState>> m_merge_pipeline[4];
MRCOwned<id<MTLRenderPipelineState>> m_interlace_pipeline[4];
MRCOwned<id<MTLRenderPipelineState>> m_datm_pipeline[2];
MRCOwned<id<MTLRenderPipelineState>> m_stencil_clear_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_primid_init_pipeline[2][2];
MRCOwned<id<MTLRenderPipelineState>> m_hdr_init_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_hdr_resolve_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_fxaa_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_shadeboost_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_imgui_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_imgui_pipeline_a8;
MRCOwned<id<MTLFunction>> m_hw_vs[1 << 3];
std::unordered_map<PSSelector, MRCOwned<id<MTLFunction>>> m_hw_ps;
std::unordered_map<PipelineSelectorMTL, MRCOwned<id<MTLRenderPipelineState>>> m_hw_pipeline;
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[8];
MRCOwned<id<MTLSamplerState>> m_sampler_hw[1 << 8];
MRCOwned<id<MTLDepthStencilState>> m_dss_stencil_zero;
MRCOwned<id<MTLDepthStencilState>> m_dss_stencil_write;
MRCOwned<id<MTLDepthStencilState>> m_dss_hw[1 << 5];
MRCOwned<id<MTLBuffer>> m_texture_download_buf;
UploadBuffer m_texture_upload_buf;
BufferPair m_vertex_upload_buf;
// MARK: Ephemeral resources
MRCOwned<id<MTLCommandBuffer>> m_current_render_cmdbuf;
struct MainRenderEncoder
{
MRCOwned<id<MTLRenderCommandEncoder>> encoder;
GSTexture* color_target = nullptr;
GSTexture* depth_target = nullptr;
GSTexture* stencil_target = nullptr;
GSTexture* tex[8] = {};
void* vertex_buffer = nullptr;
void* name = nullptr;
struct Has
{
bool cb_vs : 1;
bool cb_ps : 1;
bool scissor : 1;
bool blend_color : 1;
bool pipeline_sel : 1;
bool sampler : 1;
} has;
DepthStencilSelector depth_sel = DepthStencilSelector::NoDepth();
// Clear line (Things below here are tracked by `has` and don't need to be cleared to reset)
SamplerSelector sampler_sel;
u8 blend_color;
GSVector4i scissor;
PipelineSelectorMTL pipeline_sel;
GSHWDrawConfig::VSConstantBuffer cb_vs;
GSHWDrawConfig::PSConstantBuffer cb_ps;
MainRenderEncoder(const MainRenderEncoder&) = delete;
MainRenderEncoder() = default;
} m_current_render;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_texture_upload_encoder;
MRCOwned<id<MTLBlitCommandEncoder>> m_late_texture_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_vertex_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_vertex_upload_encoder;
struct DebugEntry
{
enum Op { Push, Insert, Pop } op;
MRCOwned<NSString*> str;
DebugEntry(Op op, MRCOwned<NSString*> str): op(op), str(std::move(str)) {}
};
std::vector<DebugEntry> m_debug_entries;
u32 m_debug_group_level = 0;
GSDeviceMTL();
~GSDeviceMTL() override;
/// Allocate space in the given buffer
Map Allocate(UploadBuffer& buffer, size_t amt);
/// Allocate space in the given buffer for use with the given render command encoder
Map Allocate(BufferPair& buffer, size_t amt);
/// Enqueue upload of any outstanding data
void Sync(BufferPair& buffer);
/// Get the texture upload encoder, creating a new one if it doesn't exist
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
/// Get the late texture upload encoder, creating a new one if it doesn't exist
id<MTLBlitCommandEncoder> GetLateTextureUploadEncoder();
/// Get the vertex upload encoder, creating a new one if it doesn't exist
id<MTLBlitCommandEncoder> GetVertexUploadEncoder();
/// Get the render command buffer, creating a new one if it doesn't exist
id<MTLCommandBuffer> GetRenderCmdBuf();
/// Flush pending operations from all encoders to the GPU
void FlushEncoders();
/// End current render pass without flushing
void EndRenderPass();
/// Begin a new render pass (may reuse existing)
void BeginRenderPass(NSString* name, GSTexture* color, MTLLoadAction color_load, GSTexture* depth, MTLLoadAction depth_load, GSTexture* stencil = nullptr, MTLLoadAction stencil_load = MTLLoadActionDontCare);
GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) override;
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) override;
void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) override;
void DoFXAA(GSTexture* sTex, GSTexture* dTex) override;
void DoShadeBoost(GSTexture* sTex, GSTexture* dTex, const float params[4]) override;
void DoExternalFX(GSTexture* sTex, GSTexture* dTex) override;
MRCOwned<id<MTLFunction>> LoadShader(NSString* name);
MRCOwned<id<MTLRenderPipelineState>> MakePipeline(MTLRenderPipelineDescriptor* desc, id<MTLFunction> vertex, id<MTLFunction> fragment, NSString* name);
bool Create(HostDisplay* display) override;
void ClearRenderTarget(GSTexture* t, const GSVector4& c) override;
void ClearRenderTarget(GSTexture* t, u32 c) override;
void ClearDepth(GSTexture* t) override;
void ClearStencil(GSTexture* t, u8 c) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override;
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) override;
void DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, id<MTLRenderPipelineState> pipeline, bool linear, LoadAction load_action, void* frag_uniform, size_t frag_uniform_len);
void DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds);
/// Copy from a position in sTex to the same position in the currently active render encoder using the given fs pipeline and rect
void RenderCopy(GSTexture* sTex, id<MTLRenderPipelineState> pipeline, const GSVector4i& rect);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void FlushClears(GSTexture* tex);
// MARK: Main Render Encoder operations
void MRESetHWPipelineState(GSHWDrawConfig::VSSelector vs, GSHWDrawConfig::PSSelector ps, GSHWDrawConfig::BlendState blend, GSHWDrawConfig::ColorMaskSelector cms);
void MRESetDSS(DepthStencilSelector sel);
void MRESetDSS(id<MTLDepthStencilState> dss);
void MRESetSampler(SamplerSelector sel);
void MRESetTexture(GSTexture* tex, int pos);
void MRESetVertices(id<MTLBuffer> buffer, size_t offset);
void MRESetScissor(const GSVector4i& scissor);
void MREClearScissor();
void MRESetCB(const GSHWDrawConfig::VSConstantBuffer& cb_vs);
void MRESetCB(const GSHWDrawConfig::PSConstantBuffer& cb_ps);
void MRESetBlendColor(u8 blend_color);
void MRESetPipeline(id<MTLRenderPipelineState> pipe);
void MREInitHWDraw(GSHWDrawConfig& config, const Map& verts);
// MARK: Render HW
void SetupDestinationAlpha(GSTexture* rt, GSTexture* ds, const GSVector4i& r, bool datm);
void RenderHW(GSHWDrawConfig& config) override;
void SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder> enc, id<MTLBuffer> buffer, size_t off);
// MARK: Debug
void PushDebugGroup(const char* fmt, ...) override;
void PopDebugGroup() override;
void InsertDebugMessage(DebugMessageCategory category, const char* fmt, ...) override;
void ProcessDebugEntry(id<MTLCommandEncoder> enc, const DebugEntry& entry);
void FlushDebugEntries(id<MTLCommandEncoder> enc);
void EndDebugGroup(id<MTLCommandEncoder> enc);
// MARK: ImGui
void RenderImGui(ImDrawData* data);
u32 FrameNo() const { return m_frame; }
};
#endif // __APPLE__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#ifndef __OBJC__
#error "This header is for use with Objective-C++ only.
#endif
#ifdef __APPLE__
#include "PCSX2Base.h"
#include "common/MRCHelpers.h"
#include <Metal/Metal.h>
struct GSMTLDevice
{
enum class MetalVersion : u8
{
Metal20, ///< Metal 2.0 (macOS 10.13, iOS 11)
Metal21, ///< Metal 2.1 (macOS 10.14, iOS 12)
Metal22, ///< Metal 2.2 (macOS 10.15, iOS 13)
Metal23, ///< Metal 2.3 (macOS 11, iOS 14)
};
struct Features
{
bool unified_memory;
bool texture_swizzle;
bool framebuffer_fetch;
bool primid;
bool slow_color_compression; ///< Color compression seems to slow down rt read on AMD
MetalVersion shader_version;
int max_texsize;
};
MRCOwned<id<MTLDevice>> dev;
MRCOwned<id<MTLLibrary>> shaders;
Features features;
GSMTLDevice() = default;
explicit GSMTLDevice(MRCOwned<id<MTLDevice>> dev);
bool IsOk() const { return dev && shaders; }
void Reset()
{
dev = nullptr;
shaders = nullptr;
}
};
const char* to_string(GSMTLDevice::MetalVersion ver);
#endif // __APPLE__

View File

@ -0,0 +1,214 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2022 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSMTLDeviceInfo.h"
#include "GS/GS.h"
#include "common/Console.h"
#ifdef __APPLE__
static id<MTLLibrary> loadMainLibrary(id<MTLDevice> dev, NSString* name)
{
NSString* path = [[NSBundle mainBundle] pathForResource:name ofType:@"metallib"];
return path ? [dev newLibraryWithFile:path error:nullptr] : nullptr;
}
static MRCOwned<id<MTLLibrary>> loadMainLibrary(id<MTLDevice> dev)
{
if (@available(macOS 11.0, iOS 14.0, *))
if (id<MTLLibrary> lib = loadMainLibrary(dev, @"Metal23"))
return MRCTransfer(lib);
if (@available(macOS 10.15, iOS 13.0, *))
if (id<MTLLibrary> lib = loadMainLibrary(dev, @"Metal22"))
return MRCTransfer(lib);
if (@available(macOS 10.14, iOS 12.0, *))
if (id<MTLLibrary> lib = loadMainLibrary(dev, @"Metal21"))
return MRCTransfer(lib);
return MRCTransfer([dev newDefaultLibrary]);
}
static GSMTLDevice::MetalVersion detectLibraryVersion(id<MTLLibrary> lib)
{
// These functions are defined in tfx.metal to indicate the metal version used to make the metallib
if (MRCTransfer([lib newFunctionWithName:@"metal_version_23"]))
return GSMTLDevice::MetalVersion::Metal23;
if (MRCTransfer([lib newFunctionWithName:@"metal_version_22"]))
return GSMTLDevice::MetalVersion::Metal22;
if (MRCTransfer([lib newFunctionWithName:@"metal_version_21"]))
return GSMTLDevice::MetalVersion::Metal21;
return GSMTLDevice::MetalVersion::Metal20;
}
static bool detectPrimIDSupport(id<MTLDevice> dev, id<MTLLibrary> lib)
{
// Nvidia Metal driver is missing primid support, yay
MRCOwned<MTLRenderPipelineDescriptor*> desc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[desc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])];
[desc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"primid_test"])];
[[desc colorAttachments][0] setPixelFormat:MTLPixelFormatR8Uint];
NSError* err;
[[dev newRenderPipelineStateWithDescriptor:desc error:&err] release];
return !err;
}
namespace
{
enum class DetectionResult
{
HaswellOrNotIntel, ///< Everything works fine
Broadwell, ///< PrimID broken
Skylake, ///< PrimID broken, FBFetch supported
};
}
static DetectionResult detectIntelGPU(id<MTLDevice> dev, id<MTLLibrary> lib)
{
// Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch!
// Annoyingly, the Haswell compiler successfully makes a pipeline but actually miscompiles it and doesn't insert any fbfetch instructions
// The Broadwell compiler inserts the Skylake fbfetch instruction, but Broadwell doesn't support that. It seems to make the shader not do anything
// So we actually have to test the thing
// In addition, Broadwell+ has broken primid so we need to disable that.
// Conveniently we can use the same test to detect both (except on macOS < 11. All Broadwell machines support 11, so the answer to that is "upgrade")
// See https://github.com/tellowkrinkle/MetalBugReproduction/releases/tag/BrokenPrimID for details
// AMD compiler crashes and gets retried 3 times over multiple seconds trying to compile the pipeline
// We know this is only a possibility on Intel anyways
if (![[dev name] containsString:@"Intel"])
return DetectionResult::HaswellOrNotIntel;
auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[pdesc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])];
[pdesc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"fbfetch_test"])];
[[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm];
auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]);
if (!pipe)
return DetectionResult::HaswellOrNotIntel;
auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]);
auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm width:1 height:1 mipmapped:false];
[tdesc setUsage:MTLTextureUsageRenderTarget];
auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]);
auto q = MRCTransfer([dev newCommandQueue]);
u32 px = 0x11223344;
memcpy([buf contents], &px, 4);
id<MTLCommandBuffer> cmdbuf = [q commandBuffer];
id<MTLBlitCommandEncoder> upload = [cmdbuf blitCommandEncoder];
[upload copyFromBuffer:buf sourceOffset:0 sourceBytesPerRow:4 sourceBytesPerImage:4 sourceSize:MTLSizeMake(1, 1, 1) toTexture:tex destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)];
[upload endEncoding];
auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
[[rpdesc colorAttachments][0] setTexture:tex];
[[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad];
[[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore];
id<MTLRenderCommandEncoder> renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc];
[renc setRenderPipelineState:pipe];
[renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
[renc endEncoding];
id<MTLBlitCommandEncoder> download = [cmdbuf blitCommandEncoder];
[download copyFromTexture:tex sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(1, 1, 1) toBuffer:buf destinationOffset:0 destinationBytesPerRow:4 destinationBytesPerImage:4];
[download endEncoding];
[cmdbuf commit];
[cmdbuf waitUntilCompleted];
u32 outpx;
memcpy(&outpx, [buf contents], 4);
// Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing
if (outpx == 0x22446688)
return DetectionResult::Skylake;
else if (outpx == 0x11223344)
return DetectionResult::Broadwell;
else
return DetectionResult::HaswellOrNotIntel;
}
GSMTLDevice::GSMTLDevice(MRCOwned<id<MTLDevice>> dev)
{
if (!dev)
return;
shaders = loadMainLibrary(dev);
memset(&features, 0, sizeof(features));
if (char* env = getenv("MTL_UNIFIED_MEMORY"))
features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y';
else if (@available(macOS 10.15, iOS 13.0, *))
features.unified_memory = [dev hasUnifiedMemory];
else
features.unified_memory = false;
if (@available(macOS 10.15, iOS 13.0, *))
if ([dev supportsFamily:MTLGPUFamilyMac2] || [dev supportsFamily:MTLGPUFamilyApple1])
features.texture_swizzle = true;
if (@available(macOS 11.0, iOS 13.0, *))
if ([dev supportsFamily:MTLGPUFamilyApple1])
features.framebuffer_fetch = true;
features.shader_version = detectLibraryVersion(shaders);
if (features.framebuffer_fetch && features.shader_version < MetalVersion::Metal23)
{
Console.Warning("Metal: GPU supports framebuffer fetch but shader lib does not! Get an updated shader lib for better performance!");
features.framebuffer_fetch = false;
}
features.primid = features.shader_version >= MetalVersion::Metal22;
if (features.primid && !detectPrimIDSupport(dev, shaders))
features.primid = false;
if (!features.framebuffer_fetch && features.shader_version >= MetalVersion::Metal23)
{
switch (detectIntelGPU(dev, shaders))
{
case DetectionResult::HaswellOrNotIntel:
break;
case DetectionResult::Broadwell:
features.primid = false; // Broken
break;
case DetectionResult::Skylake:
features.primid = false; // Broken
features.framebuffer_fetch = true;
break;
}
}
if (features.framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
features.framebuffer_fetch = false;
}
if (char* env = getenv("MTL_SLOW_COLOR_COMPRESSION"))
features.slow_color_compression = env[0] == '1' || env[0] == 'y' || env[0] == 'Y';
else
features.slow_color_compression = [[dev name] containsString:@"AMD"];
features.max_texsize = 8192;
if ([dev supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1])
features.max_texsize = 16384;
if (@available(macOS 10.15, iOS 13.0, *))
if ([dev supportsFamily:MTLGPUFamilyApple3])
features.max_texsize = 16384;
this->dev = std::move(dev);
}
const char* to_string(GSMTLDevice::MetalVersion ver)
{
switch (ver)
{
case GSMTLDevice::MetalVersion::Metal20: return "Metal 2.0";
case GSMTLDevice::MetalVersion::Metal21: return "Metal 2.1";
case GSMTLDevice::MetalVersion::Metal22: return "Metal 2.2";
case GSMTLDevice::MetalVersion::Metal23: return "Metal 2.3";
}
}
#endif // __APPLE__

View File

@ -0,0 +1,60 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <metal_stdlib>
#include "GSMTLSharedHeader.h"
using namespace metal;
constant uchar2 SCALING_FACTOR [[function_constant(GSMTLConstantIndex_SCALING_FACTOR)]];
struct ConvertShaderData
{
float4 p [[position]];
float2 t;
};
struct ConvertPSRes
{
texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]];
sampler s [[sampler(0)]];
float4 sample(float2 coord)
{
return texture.sample(s, coord);
}
};
struct ConvertPSDepthRes
{
depth2d<float> texture [[texture(GSMTLTextureIndexNonHW)]];
sampler s [[sampler(0)]];
float sample(float2 coord)
{
return texture.sample(s, coord);
}
};
static inline float4 convert_depth32_rgba8(float value)
{
uint val = uint(value * 0x1p32);
return float4(as_type<uchar4>(val));
}
static inline float4 convert_depth16_rgba8(float value)
{
uint val = uint(value * 0x1p32);
return float4(uint4(val << 3, val >> 2, val >> 7, val >> 8) & uint4(0xf8, 0xf8, 0xf8, 0x80));
}

View File

@ -0,0 +1,151 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <simd/simd.h>
enum GSMTLBufferIndices
{
GSMTLBufferIndexVertices,
GSMTLBufferIndexUniforms,
GSMTLBufferIndexHWVertices,
GSMTLBufferIndexHWUniforms,
};
enum GSMTLTextureIndex
{
GSMTLTextureIndexNonHW,
GSMTLTextureIndexTex,
GSMTLTextureIndexPalette,
GSMTLTextureIndexRenderTarget,
GSMTLTextureIndexPrimIDs,
};
struct GSMTLConvertPSUniform
{
int emoda;
int emodc;
};
struct GSMTLInterlacePSUniform
{
vector_float2 ZrH;
float hH;
};
struct GSMTLMainVSUniform
{
vector_float2 vertex_scale;
vector_float2 vertex_offset;
vector_float2 texture_scale;
vector_float2 texture_offset;
vector_float2 point_size;
uint max_depth;
};
struct GSMTLMainPSUniform
{
union
{
vector_float4 fog_color_aref;
vector_float3 fog_color;
struct
{
float pad0[3];
float aref;
};
};
vector_float4 wh; ///< xy => PS2, zw => actual (upscaled)
vector_float2 ta;
float max_depth;
float alpha_fix;
vector_uint4 uv_msk_fix;
vector_uint4 fbmask;
vector_float4 half_texel;
vector_float4 uv_min_max;
struct
{
unsigned int blue_mask;
unsigned int blue_shift;
unsigned int green_mask;
unsigned int green_shift;
} channel_shuffle;
vector_float2 tc_offset;
vector_float2 st_scale;
matrix_float4x4 dither_matrix;
};
enum GSMTLAttributes
{
GSMTLAttributeIndexST,
GSMTLAttributeIndexC,
GSMTLAttributeIndexQ,
GSMTLAttributeIndexXY,
GSMTLAttributeIndexZ,
GSMTLAttributeIndexUV,
GSMTLAttributeIndexF,
};
enum GSMTLFnConstants
{
GSMTLConstantIndex_SCALING_FACTOR,
GSMTLConstantIndex_FRAMEBUFFER_FETCH,
GSMTLConstantIndex_FST,
GSMTLConstantIndex_IIP,
GSMTLConstantIndex_VS_POINT_SIZE,
GSMTLConstantIndex_PS_AEM_FMT,
GSMTLConstantIndex_PS_PAL_FMT,
GSMTLConstantIndex_PS_DFMT,
GSMTLConstantIndex_PS_DEPTH_FMT,
GSMTLConstantIndex_PS_AEM,
GSMTLConstantIndex_PS_FBA,
GSMTLConstantIndex_PS_FOG,
GSMTLConstantIndex_PS_DATE,
GSMTLConstantIndex_PS_ATST,
GSMTLConstantIndex_PS_TFX,
GSMTLConstantIndex_PS_TCC,
GSMTLConstantIndex_PS_WMS,
GSMTLConstantIndex_PS_WMT,
GSMTLConstantIndex_PS_LTF,
GSMTLConstantIndex_PS_SHUFFLE,
GSMTLConstantIndex_PS_READ_BA,
GSMTLConstantIndex_PS_WRITE_RG,
GSMTLConstantIndex_PS_FBMASK,
GSMTLConstantIndex_PS_BLEND_A,
GSMTLConstantIndex_PS_BLEND_B,
GSMTLConstantIndex_PS_BLEND_C,
GSMTLConstantIndex_PS_BLEND_D,
GSMTLConstantIndex_PS_CLR_HW,
GSMTLConstantIndex_PS_HDR,
GSMTLConstantIndex_PS_COLCLIP,
GSMTLConstantIndex_PS_BLEND_MIX,
GSMTLConstantIndex_PS_PABE,
GSMTLConstantIndex_PS_NO_COLOR,
GSMTLConstantIndex_PS_NO_COLOR1,
GSMTLConstantIndex_PS_ONLY_ALPHA,
GSMTLConstantIndex_PS_CHANNEL,
GSMTLConstantIndex_PS_DITHER,
GSMTLConstantIndex_PS_ZCLAMP,
GSMTLConstantIndex_PS_TCOFFSETHACK,
GSMTLConstantIndex_PS_URBAN_CHAOS_HLE,
GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE,
GSMTLConstantIndex_PS_TEX_IS_FB,
GSMTLConstantIndex_PS_AUTOMATIC_LOD,
GSMTLConstantIndex_PS_MANUAL_LOD,
GSMTLConstantIndex_PS_POINT_SAMPLER,
GSMTLConstantIndex_PS_INVALID_TEX0,
GSMTLConstantIndex_PS_SCANMSK,
};

View File

@ -0,0 +1,28 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Header with all metal stuff available for use with C++ (rather than Objective-C++)
#ifdef __APPLE__
#include "HostDisplay.h"
class GSDevice;
GSDevice* MakeGSDeviceMTL();
HostDisplay* MakeMetalHostDisplay();
HostDisplay::AdapterAndModeList GetMetalAdapterAndModeList();
#endif

View File

@ -0,0 +1,82 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GS/Renderers/Common/GSTexture.h"
#ifndef __OBJC__
#error "This header is for use with Objective-C++ only.
#endif
#ifdef __APPLE__
#include "common/MRCHelpers.h"
#include <Metal/Metal.h>
class GSDeviceMTL;
class GSTextureMTL : public GSTexture
{
GSDeviceMTL* m_dev;
MRCOwned<id<MTLTexture>> m_texture;
bool m_has_mipmaps = false;
// In Metal clears happen as a part of render passes instead of as separate steps, but the GSDevice API has it as a separate step
// To deal with that, store the fact that a clear was requested here and it'll be applied on the next render pass
bool m_needs_color_clear = false;
bool m_needs_depth_clear = false;
bool m_needs_stencil_clear = false;
GSVector4 m_clear_color;
float m_clear_depth;
int m_clear_stencil;
public:
u64 m_last_read = 0; ///< Last time this texture was read by a draw
u64 m_last_write = 0; ///< Last time this texture was written by a draw
GSTextureMTL(GSDeviceMTL* dev, MRCOwned<id<MTLTexture>> texture, Type type, Format format);
~GSTextureMTL();
/// For making fake backbuffers
void SetSize(GSVector2i size) { m_size = size; }
/// Requests the texture be cleared the next time a color render is done
void RequestColorClear(GSVector4 color);
/// Requests the texture be cleared the next time a depth render is done
void RequestDepthClear(float depth);
/// Requests the texture be cleared the next time a stencil render is done
void RequestStencilClear(int stencil);
/// Reads whether a color clear was requested, then clears the request
bool GetResetNeedsColorClear(GSVector4& colorOut);
/// Reads whether a depth clear was requested, then clears the request
bool GetResetNeedsDepthClear(float& depthOut);
/// Reads whether a stencil clear was requested, then clears the request
bool GetResetNeedsStencilClear(int& stencilOut);
/// Flushes requested clears to the texture
void FlushClears();
/// Marks pending clears as done (e.g. if the whole texture is about to be overwritten)
void InvalidateClears();
void* GetNativeHandle() const override;
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) override;
bool Map(GSMap& m, const GSVector4i* r = NULL, int layer = 0) override;
void* MapWithPitch(const GSVector4i& r, int pitch, int layer);
void Unmap() override;
void GenerateMipmap() override;
bool Save(const std::string& fn) override;
void Swap(GSTexture* tex) override;
id<MTLTexture> GetTexture() { return m_texture; }
};
#endif

View File

@ -0,0 +1,216 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "GSTextureMTL.h"
#include "GSDeviceMTL.h"
#include "GS/GSPerfMon.h"
#ifdef __APPLE__
GSTextureMTL::GSTextureMTL(GSDeviceMTL* dev, MRCOwned<id<MTLTexture>> texture, Type type, Format format)
: m_dev(dev)
, m_texture(std::move(texture))
{
m_type = type;
m_format = format;
m_size.x = [m_texture width];
m_size.y = [m_texture height];
m_mipmap_levels = [m_texture mipmapLevelCount];
}
GSTextureMTL::~GSTextureMTL()
{
}
void GSTextureMTL::RequestColorClear(GSVector4 color)
{
m_needs_color_clear = true;
m_clear_color = color;
}
void GSTextureMTL::RequestDepthClear(float depth)
{
m_needs_depth_clear = true;
m_clear_depth = depth;
}
void GSTextureMTL::RequestStencilClear(int stencil)
{
m_needs_stencil_clear = true;
m_clear_stencil = stencil;
}
bool GSTextureMTL::GetResetNeedsColorClear(GSVector4& colorOut)
{
if (m_needs_color_clear)
{
m_needs_color_clear = false;
colorOut = m_clear_color;
return true;
}
return false;
}
bool GSTextureMTL::GetResetNeedsDepthClear(float& depthOut)
{
if (m_needs_depth_clear)
{
m_needs_depth_clear = false;
depthOut = m_clear_depth;
return true;
}
return false;
}
bool GSTextureMTL::GetResetNeedsStencilClear(int& stencilOut)
{
if (m_needs_stencil_clear)
{
m_needs_stencil_clear = false;
stencilOut = m_clear_stencil;
return true;
}
return false;
}
void GSTextureMTL::FlushClears()
{
if (!m_needs_color_clear && !m_needs_depth_clear && !m_needs_stencil_clear)
return;
m_dev->BeginRenderPass(@"Clear",
m_needs_color_clear ? this : nullptr, MTLLoadActionLoad,
m_needs_depth_clear ? this : nullptr, MTLLoadActionLoad,
m_needs_stencil_clear ? this : nullptr, MTLLoadActionLoad);
}
void* GSTextureMTL::GetNativeHandle() const
{
return (__bridge void*)m_texture;
}
void GSTextureMTL::InvalidateClears()
{
m_needs_color_clear = false;
m_needs_depth_clear = false;
m_needs_stencil_clear = false;
}
bool GSTextureMTL::Update(const GSVector4i& r, const void* data, int pitch, int layer)
{
if (void* buffer = MapWithPitch(r, pitch, layer))
{
memcpy(buffer, data, CalcUploadSize(r.height(), pitch));
return true;
}
return false;
}
bool GSTextureMTL::Map(GSMap& m, const GSVector4i* _r, int layer)
{
GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y);
u32 block_size = GetCompressedBlockSize();
u32 blocks_wide = (r.width() + block_size - 1) / block_size;
m.pitch = blocks_wide * GetCompressedBytesPerBlock();
if (void* buffer = MapWithPitch(r, m.pitch, layer))
{
m.bits = static_cast<u8*>(buffer);
return true;
}
return false;
}
void* GSTextureMTL::MapWithPitch(const GSVector4i& r, int pitch, int layer)
{ @autoreleasepool {
if (layer >= m_mipmap_levels)
return nullptr;
m_has_mipmaps = false;
size_t size = CalcUploadSize(r.height(), pitch);
GSDeviceMTL::Map map;
bool needs_clear = false;
if (m_needs_color_clear)
{
m_needs_color_clear = false;
// Not uploading to full texture
needs_clear = r.left > 0 || r.top > 0 || r.right < m_size.x || r.bottom < m_size.y;
}
id<MTLBlitCommandEncoder> enc;
if (m_last_read == m_dev->m_current_draw || needs_clear)
{
if (needs_clear)
{
m_needs_color_clear = true;
m_dev->BeginRenderPass(@"Pre-Upload Clear", this, MTLLoadActionLoad, nullptr, MTLLoadActionDontCare);
}
enc = m_dev->GetLateTextureUploadEncoder();
map = m_dev->Allocate(m_dev->m_vertex_upload_buf, size);
}
else
{
enc = m_dev->GetTextureUploadEncoder();
map = m_dev->Allocate(m_dev->m_texture_upload_buf, size);
}
// Copy is scheduled now, won't happen until the encoder is committed so no problems with ordering
[enc copyFromBuffer:map.gpu_buffer
sourceOffset:map.gpu_offset
sourceBytesPerRow:pitch
sourceBytesPerImage:size
sourceSize:MTLSizeMake(r.width(), r.height(), 1)
toTexture:m_texture
destinationSlice:0
destinationLevel:layer
destinationOrigin:MTLOriginMake(r.x, r.y, 0)];
g_perfmon.Put(GSPerfMon::TextureUploads, 1);
return map.cpu_buffer;
}}
void GSTextureMTL::Unmap()
{
// Nothing to do here, upload is already scheduled
}
void GSTextureMTL::GenerateMipmap()
{ @autoreleasepool {
if (m_mipmap_levels > 1 && !m_has_mipmaps)
{
id<MTLBlitCommandEncoder> enc = m_dev->GetTextureUploadEncoder();
[enc generateMipmapsForTexture:m_texture];
}
}}
bool GSTextureMTL::Save(const std::string& fn)
{
// TODO: Implement
return false;
}
void GSTextureMTL::Swap(GSTexture* other)
{
GSTexture::Swap(other);
GSTextureMTL* mtex = static_cast<GSTextureMTL*>(other);
pxAssert(m_dev == mtex->m_dev);
#define SWAP(x) std::swap(x, mtex->x)
SWAP(m_texture);
SWAP(m_has_mipmaps);
SWAP(m_needs_color_clear);
SWAP(m_needs_depth_clear);
SWAP(m_needs_stencil_clear);
SWAP(m_clear_color);
SWAP(m_clear_depth);
SWAP(m_clear_stencil);
#undef SWAP
}
#endif

View File

@ -0,0 +1,378 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSMTLShaderCommon.h"
using namespace metal;
struct ConvertVSIn
{
vector_float2 position [[attribute(0)]];
vector_float2 texcoord0 [[attribute(1)]];
};
struct ImGuiVSIn
{
vector_float2 position [[attribute(0)]];
vector_float2 texcoord0 [[attribute(1)]];
vector_half4 color [[attribute(2)]];
};
struct ImGuiShaderData
{
float4 p [[position]];
float2 t;
half4 c;
};
template <typename Format>
struct DirectReadTextureIn
{
texture2d<Format> tex [[texture(GSMTLTextureIndexNonHW)]];
vec<Format, 4> read(float4 pos)
{
return tex.read(uint2(pos.xy));
}
};
vertex ConvertShaderData fs_triangle(uint vid [[vertex_id]])
{
ConvertShaderData out;
out.p = float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1);
out.t = float2(vid & 1 ? 2 : 0, vid & 2 ? -1 : 1);
return out;
}
vertex ConvertShaderData vs_convert(ConvertVSIn in [[stage_in]])
{
ConvertShaderData out;
out.p = float4(in.position, 0, 1);
out.t = in.texcoord0;
return out;
}
vertex ImGuiShaderData vs_imgui(ImGuiVSIn in [[stage_in]], constant float4& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
ImGuiShaderData out;
out.p = float4(in.position * cb.xy + cb.zw, 0, 1);
out.t = in.texcoord0;
out.c = in.color;
return out;
}
float4 ps_crt(float4 color, int i)
{
constexpr float4 mask[4] =
{
float4(1, 0, 0, 0),
float4(0, 1, 0, 0),
float4(0, 0, 1, 0),
float4(1, 1, 1, 0),
};
return color * saturate(mask[i] + 0.5f);
}
float4 ps_scanlines(float4 color, int i)
{
constexpr float4 mask[2] =
{
float4(1, 1, 1, 0),
float4(0, 0, 0, 0)
};
return color * saturate(mask[i] + 0.5f);
}
fragment float4 ps_copy(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
return res.sample(data.t);
}
fragment ushort ps_convert_rgba8_16bits(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
float4 c = res.sample(data.t);
uint4 cu = uint4(c * 255.f + 0.5f);
return (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000);
}
fragment float4 ps_copy_fs(float4 p [[position]], DirectReadTextureIn<float> tex)
{
return tex.read(p);
}
fragment void ps_datm1(float4 p [[position]], DirectReadTextureIn<float> tex)
{
if (tex.read(p).a < (127.5f / 255.f))
discard_fragment();
}
fragment void ps_datm0(float4 p [[position]], DirectReadTextureIn<float> tex)
{
if (tex.read(p).a > (127.5f / 255.f))
discard_fragment();
}
fragment float4 ps_primid_init_datm0(float4 p [[position]], DirectReadTextureIn<float> tex)
{
return tex.read(p).a > (127.5f / 255.f) ? -1 : FLT_MAX;
}
fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn<float> tex)
{
return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX;
}
fragment float4 ps_mod256(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 c = round(tex.read(p) * 255.f);
return (c - 256.f * floor(c / 256.f)) / 255.f;
}
fragment float4 ps_filter_scanlines(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
return ps_scanlines(res.sample(data.t), uint(data.p.y) % 2);
}
fragment float4 ps_filter_diagonal(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
uint4 p = uint4(data.p);
return ps_crt(res.sample(data.t), (p.x + (p.y % 3)) % 3);
}
fragment float4 ps_filter_transparency(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
float4 c = res.sample(data.t);
c.a = dot(c.rgb, float3(0.299f, 0.587f, 0.114f));
return c;
}
fragment float4 ps_filter_triangular(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
uint4 p = uint4(data.p);
uint val = ((p.x + ((p.y >> 1) & 1) * 3) >> 1) % 3;
return ps_crt(res.sample(data.t), val);
}
fragment float4 ps_filter_complex(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
float2 texdim = float2(res.texture.get_width(), res.texture.get_height());
if (dfdy(data.t.y) * texdim.y > 0.5)
{
return res.sample(data.t);
}
else
{
float factor = (0.9f - 0.4f * cos(2.f * M_PI_F * data.t.y * texdim.y));
float ycoord = (floor(data.t.y * texdim.y) + 0.5f) / texdim.y;
return factor * res.sample(float2(data.t.x, ycoord));
}
}
fragment uint ps_convert_float32_32bits(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res)
{
return uint(0x1p32 * res.sample(data.t));
}
fragment float4 ps_convert_float32_rgba8(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res)
{
return convert_depth32_rgba8(res.sample(data.t)) / 255.f;
}
fragment float4 ps_convert_float16_rgb5a1(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res)
{
return convert_depth16_rgba8(res.sample(data.t)) / 255.f;
}
struct DepthOut
{
float depth [[depth(any)]];
DepthOut(float depth): depth(depth) {}
};
fragment DepthOut ps_depth_copy(ConvertShaderData data [[stage_in]], ConvertPSDepthRes res)
{
return res.sample(data.t);
}
static float pack_rgba8_depth(float4 unorm)
{
return float(as_type<uint>(uchar4(unorm * 255.f + 0.5f))) * 0x1p-32f;
}
fragment DepthOut ps_convert_rgba8_float32(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
return pack_rgba8_depth(res.sample(data.t));
}
fragment DepthOut ps_convert_rgba8_float24(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
// Same as above but without the alpha channel (24 bits Z)
return pack_rgba8_depth(float4(res.sample(data.t).rgb, 0));
}
fragment DepthOut ps_convert_rgba8_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
return float(as_type<ushort>(uchar2(res.sample(data.t).rg * 255.f + 0.5f))) * 0x1p-32;
}
fragment DepthOut ps_convert_rgb5a1_float16(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
uint4 cu = uint4(res.sample(data.t) * 255.f + 0.5f);
uint out = (cu.x >> 3) | ((cu.y << 2) & 0x03e0) | ((cu.z << 7) & 0x7c00) | ((cu.w << 8) & 0x8000);
return float(out) * 0x1p-32;
}
fragment float4 ps_convert_rgba_8i(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
// Convert a RGBA texture into a 8 bits packed texture
// Input column: 8x2 RGBA pixels
// 0: 8 RGBA
// 1: 8 RGBA
// Output column: 16x4 Index pixels
// 0: 8 R | 8 B
// 1: 8 R | 8 B
// 2: 8 G | 8 A
// 3: 8 G | 8 A
float c;
uint2 sel = uint2(data.p.xy) % uint2(16, 16);
uint2 tb = (uint2(data.p.xy) & ~uint2(15, 3)) >> 1;
uint ty = tb.y | (uint(data.p.y) & 1);
uint txN = tb.x | (uint(data.p.x) & 7);
uint txH = tb.x | ((uint(data.p.x) + 4) & 7);
txN *= SCALING_FACTOR.x;
txH *= SCALING_FACTOR.x;
ty *= SCALING_FACTOR.y;
// TODO investigate texture gather
float4 cN = res.texture.read(uint2(txN, ty));
float4 cH = res.texture.read(uint2(txH, ty));
if ((sel.y & 4) == 0)
{
// Column 0 and 2
if ((sel.y & 2) == 0)
{
if ((sel.x & 8) == 0)
c = cN.r;
else
c = cN.b;
}
else
{
if ((sel.x & 8) == 0)
c = cH.g;
else
c = cH.a;
}
}
else
{
// Column 1 and 3
if ((sel.y & 2) == 0)
{
if ((sel.x & 8) == 0)
c = cH.r;
else
c = cH.b;
}
else
{
if ((sel.x & 8) == 0)
c = cN.g;
else
c = cN.a;
}
}
return float4(c);
}
fragment float4 ps_yuv(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLConvertPSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
float4 i = res.sample(data.t);
float4 o;
// Value from GS manual
const float3x3 rgb2yuv =
{
{0.587, -0.311, -0.419},
{0.114, 0.500, -0.081},
{0.299, -0.169, 0.500}
};
float3 yuv = rgb2yuv * i.gbr;
float Y = 0xDB / 255.f * yuv.x + 0x10 / 255.f;
float Cr = 0xE0 / 255.f * yuv.y + 0x80 / 255.f;
float Cb = 0xE0 / 255.f * yuv.z + 0x80 / 255.f;
switch (uniform.emoda)
{
case 0: o.a = i.a; break;
case 1: o.a = Y; break;
case 2: o.a = Y/2; break;
case 3: o.a = 0; break;
}
switch (uniform.emodc)
{
case 0: o.rgb = i.rgb; break;
case 1: o.rgb = float3(Y); break;
case 2: o.rgb = float3(Y, Cb, Cr); break;
case 3: o.rgb = float3(i.a); break;
}
return o;
}
fragment half4 ps_imgui(ImGuiShaderData data [[stage_in]], texture2d<half> texture [[texture(GSMTLTextureIndexNonHW)]])
{
constexpr sampler s(coord::normalized, filter::linear, address::clamp_to_edge);
return data.c * texture.sample(s, data.t);
}
fragment half4 ps_imgui_a8(ImGuiShaderData data [[stage_in]], texture2d<half> texture [[texture(GSMTLTextureIndexNonHW)]])
{
constexpr sampler s(coord::normalized, filter::linear, address::clamp_to_edge);
return data.c * half4(1, 1, 1, texture.sample(s, data.t).a);
}
fragment float4 ps_shadeboost(float4 p [[position]], DirectReadTextureIn<float> tex, constant float3& cb [[buffer(GSMTLBufferIndexUniforms)]])
{
const float brt = cb.x;
const float con = cb.y;
const float sat = cb.z;
// Increase or decrease these values to adjust r, g and b color channels separately
const float AvgLumR = 0.5;
const float AvgLumG = 0.5;
const float AvgLumB = 0.5;
const float3 LumCoeff = float3(0.2125, 0.7154, 0.0721);
float3 AvgLumin = float3(AvgLumR, AvgLumG, AvgLumB);
float3 brtColor = tex.read(p).rgb * brt;
float dot_intensity = dot(brtColor, LumCoeff);
float3 intensity = float3(dot_intensity, dot_intensity, dot_intensity);
float3 satColor = mix(intensity, brtColor, sat);
float3 conColor = mix(AvgLumin, satColor, con);
return float4(conColor, 1);
}

View File

@ -0,0 +1,10 @@
#include "GSMTLShaderCommon.h"
#include "../../../../bin/resources/shaders/common/fxaa.fx"
fragment float4 ps_fxaa(ConvertShaderData data [[stage_in]], texture2d<float> tex [[texture(GSMTLTextureIndexNonHW)]])
{
float4 color = tex.sample(MAIN_SAMPLER, data.t);
color = PreGammaPass(color);
color = FxaaPass(color, data.t, tex);
return color;
}

View File

@ -0,0 +1,49 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSMTLShaderCommon.h"
using namespace metal;
fragment float4 ps_interlace0(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
if (fract(data.t.y * uniform.hH) - 0.5f < 0.f)
discard_fragment();
return res.sample(data.t);
}
fragment float4 ps_interlace1(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
if (0.5f - fract(data.t.y * uniform.hH) < 0.f)
discard_fragment();
return res.sample(data.t);
}
fragment float4 ps_interlace2(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant GSMTLInterlacePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
float4 c0 = res.sample(data.t - uniform.ZrH);
float4 c1 = res.sample(data.t);
float4 c2 = res.sample(data.t + uniform.ZrH);
return (c0 + c1 * 2.f + c2) / 4.f;
}
fragment float4 ps_interlace3(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
return res.sample(data.t);
}

View File

@ -0,0 +1,34 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSMTLShaderCommon.h"
using namespace metal;
fragment float4 ps_merge0(ConvertShaderData data [[stage_in]], ConvertPSRes res)
{
float4 c = res.sample(data.t);
c.a *= 2.f;
return c;
}
fragment float4 ps_merge1(ConvertShaderData data [[stage_in]], ConvertPSRes res,
constant vector_float4& BGColor [[buffer(GSMTLBufferIndexUniforms)]])
{
float4 c = res.sample(data.t);
c.a = BGColor.a;
return c;
}

View File

@ -0,0 +1,939 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "GSMTLShaderCommon.h"
constant uint FMT_32 = 0;
constant uint FMT_24 = 1;
constant uint FMT_16 = 2;
constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAMEBUFFER_FETCH)]];
constant bool FST [[function_constant(GSMTLConstantIndex_FST)]];
constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]];
constant bool VS_POINT_SIZE [[function_constant(GSMTLConstantIndex_VS_POINT_SIZE)]];
constant uint PS_AEM_FMT [[function_constant(GSMTLConstantIndex_PS_AEM_FMT)]];
constant uint PS_PAL_FMT [[function_constant(GSMTLConstantIndex_PS_PAL_FMT)]];
constant uint PS_DFMT [[function_constant(GSMTLConstantIndex_PS_DFMT)]];
constant uint PS_DEPTH_FMT [[function_constant(GSMTLConstantIndex_PS_DEPTH_FMT)]];
constant bool PS_AEM [[function_constant(GSMTLConstantIndex_PS_AEM)]];
constant bool PS_FBA [[function_constant(GSMTLConstantIndex_PS_FBA)]];
constant bool PS_FOG [[function_constant(GSMTLConstantIndex_PS_FOG)]];
constant uint PS_DATE [[function_constant(GSMTLConstantIndex_PS_DATE)]];
constant uint PS_ATST [[function_constant(GSMTLConstantIndex_PS_ATST)]];
constant uint PS_TFX [[function_constant(GSMTLConstantIndex_PS_TFX)]];
constant bool PS_TCC [[function_constant(GSMTLConstantIndex_PS_TCC)]];
constant uint PS_WMS [[function_constant(GSMTLConstantIndex_PS_WMS)]];
constant uint PS_WMT [[function_constant(GSMTLConstantIndex_PS_WMT)]];
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]];
constant uint PS_BLEND_A [[function_constant(GSMTLConstantIndex_PS_BLEND_A)]];
constant uint PS_BLEND_B [[function_constant(GSMTLConstantIndex_PS_BLEND_B)]];
constant uint PS_BLEND_C [[function_constant(GSMTLConstantIndex_PS_BLEND_C)]];
constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BLEND_D)]];
constant uint PS_CLR_HW [[function_constant(GSMTLConstantIndex_PS_CLR_HW)]];
constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]];
constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]];
constant bool PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
constant bool PS_PABE [[function_constant(GSMTLConstantIndex_PS_PABE)]];
constant bool PS_NO_COLOR [[function_constant(GSMTLConstantIndex_PS_NO_COLOR)]];
constant bool PS_NO_COLOR1 [[function_constant(GSMTLConstantIndex_PS_NO_COLOR1)]];
constant bool PS_ONLY_ALPHA [[function_constant(GSMTLConstantIndex_PS_ONLY_ALPHA)]];
constant uint PS_CHANNEL [[function_constant(GSMTLConstantIndex_PS_CHANNEL)]];
constant uint PS_DITHER [[function_constant(GSMTLConstantIndex_PS_DITHER)]];
constant bool PS_ZCLAMP [[function_constant(GSMTLConstantIndex_PS_ZCLAMP)]];
constant bool PS_TCOFFSETHACK [[function_constant(GSMTLConstantIndex_PS_TCOFFSETHACK)]];
constant bool PS_URBAN_CHAOS_HLE [[function_constant(GSMTLConstantIndex_PS_URBAN_CHAOS_HLE)]];
constant bool PS_TALES_OF_ABYSS_HLE [[function_constant(GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE)]];
constant bool PS_TEX_IS_FB [[function_constant(GSMTLConstantIndex_PS_TEX_IS_FB)]];
constant bool PS_AUTOMATIC_LOD [[function_constant(GSMTLConstantIndex_PS_AUTOMATIC_LOD)]];
constant bool PS_MANUAL_LOD [[function_constant(GSMTLConstantIndex_PS_MANUAL_LOD)]];
constant bool PS_POINT_SAMPLER [[function_constant(GSMTLConstantIndex_PS_POINT_SAMPLER)]];
constant bool PS_INVALID_TEX0 [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]];
constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]];
#if defined(__METAL_MACOS__) && __METAL_VERSION__ >= 220
#define PRIMID_SUPPORT 1
#else
#define PRIMID_SUPPORT 0
#endif
#if defined(__METAL_IOS__) || __METAL_VERSION__ >= 230
#define FBFETCH_SUPPORT 1
#else
#define FBFETCH_SUPPORT 0
#endif
constant bool PS_PRIM_CHECKING_INIT = PS_DATE == 1 || PS_DATE == 2;
constant bool PS_PRIM_CHECKING_READ = PS_DATE == 3;
#if PRIMID_SUPPORT
constant bool NEEDS_PRIMID = PS_PRIM_CHECKING_INIT || PS_PRIM_CHECKING_READ;
#endif
constant bool PS_TEX_IS_DEPTH = PS_URBAN_CHAOS_HLE || PS_TALES_OF_ABYSS_HLE || PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2;
constant bool PS_TEX_IS_COLOR = !PS_TEX_IS_DEPTH;
constant bool PS_HAS_PALETTE = PS_PAL_FMT != 0 || (PS_CHANNEL >= 1 && PS_CHANNEL <= 5);
constant bool NOT_IIP = !IIP;
constant bool SW_BLEND = (PS_BLEND_A != PS_BLEND_B) || PS_BLEND_D;
constant bool SW_AD_TO_HW = PS_BLEND_C == 1 && PS_CLR_HW > 3;
constant bool NEEDS_RT_FOR_BLEND = (((PS_BLEND_A != PS_BLEND_B) && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1)) || PS_BLEND_D == 1 || SW_AD_TO_HW);
constant bool NEEDS_RT_EARLY = PS_TEX_IS_FB || PS_DATE >= 5;
constant bool NEEDS_RT = NEEDS_RT_EARLY || (!PS_PRIM_CHECKING_INIT && (PS_FBMASK || NEEDS_RT_FOR_BLEND));
constant bool PS_COLOR0 = !PS_NO_COLOR;
constant bool PS_COLOR1 = !PS_NO_COLOR1;
struct MainVSIn
{
float2 st [[attribute(GSMTLAttributeIndexST)]];
float4 c [[attribute(GSMTLAttributeIndexC)]];
float q [[attribute(GSMTLAttributeIndexQ)]];
uint2 p [[attribute(GSMTLAttributeIndexXY)]];
uint z [[attribute(GSMTLAttributeIndexZ)]];
uint2 uv [[attribute(GSMTLAttributeIndexUV)]];
float4 f [[attribute(GSMTLAttributeIndexF)]];
};
struct MainVSOut
{
float4 p [[position]];
float4 t;
float4 ti;
float4 c [[function_constant(IIP)]];
float4 fc [[flat, function_constant(NOT_IIP)]];
float point_size [[point_size, function_constant(VS_POINT_SIZE)]];
};
struct MainPSIn
{
float4 p [[position]];
float4 t;
float4 ti;
float4 c [[function_constant(IIP)]];
float4 fc [[flat, function_constant(NOT_IIP)]];
};
struct MainPSOut
{
float4 c0 [[color(0), index(0), function_constant(PS_COLOR0)]];
float4 c1 [[color(0), index(1), function_constant(PS_COLOR1)]];
float depth [[depth(less), function_constant(PS_ZCLAMP)]];
};
// MARK: - Vertex functions
static void texture_coord(thread const MainVSIn& v, thread MainVSOut& out, constant GSMTLMainVSUniform& cb)
{
float2 uv = float2(v.uv) - cb.texture_offset;
float2 st = v.st - cb.texture_offset;
// Float coordinate
out.t.xy = st;
out.t.w = v.q;
// Integer coordinate => normalized
out.ti.xy = uv * cb.texture_scale;
if (FST)
{
// Integer coordinate => integral
out.ti.zw = uv;
}
else
{
// Some games uses float coordinate for post-processing effects
out.ti.zw = st / cb.texture_scale;
}
}
static MainVSOut vs_main_run(thread const MainVSIn& v, constant GSMTLMainVSUniform& cb)
{
constexpr float exp_min32 = 0x1p-32;
MainVSOut out;
// Clamp to max depth, gs doesn't wrap
uint z = min(v.z, cb.max_depth);
out.p.xy = float2(v.p) - float2(0.05, 0.05);
out.p.xy = out.p.xy * float2(cb.vertex_scale.x, -cb.vertex_scale.y) - float2(cb.vertex_offset.x, -cb.vertex_offset.y);
out.p.w = 1;
out.p.z = float(z) * exp_min32;
texture_coord(v, out, cb);
if (IIP)
out.c = v.c;
else
out.fc = v.c;
out.t.z = v.f.x; // pack fog with texture
if (VS_POINT_SIZE)
out.point_size = SCALING_FACTOR.x;
return out;
}
vertex MainVSOut vs_main(MainVSIn v [[stage_in]], constant GSMTLMainVSUniform& cb [[buffer(GSMTLBufferIndexHWUniforms)]])
{
return vs_main_run(v, cb);
}
// MARK: - Fragment functions
constexpr sampler palette_sampler(filter::nearest, address::clamp_to_edge);
struct PSMain
{
texture2d<float> tex;
depth2d<float> tex_depth;
texture2d<float> palette;
texture2d<float> prim_id_tex;
sampler tex_sampler;
float4 current_color;
uint prim_id;
const thread MainPSIn& in;
constant GSMTLMainPSUniform& cb;
PSMain(const thread MainPSIn& in, constant GSMTLMainPSUniform& cb): in(in), cb(cb) {}
template <typename... Args>
float4 sample_tex(Args... args)
{
if (PS_TEX_IS_DEPTH)
return float4(tex_depth.sample(args...));
else
return tex.sample(args...);
}
float4 sample_c(float2 uv)
{
if (PS_TEX_IS_FB)
return current_color;
if (PS_POINT_SAMPLER)
{
// Weird issue with ATI/AMD cards,
// it looks like they add 127/128 of a texel to sampling coordinates
// occasionally causing point sampling to erroneously round up.
// I'm manually adjusting coordinates to the centre of texels here,
// though the centre is just paranoia, the top left corner works fine.
// As of 2018 this issue is still present.
uv = (trunc(uv * cb.wh.zw) + 0.5) / cb.wh.zw;
}
uv *= cb.st_scale;
if (PS_AUTOMATIC_LOD)
{
return sample_tex(tex_sampler, uv);
}
else if (PS_MANUAL_LOD)
{
float K = cb.uv_min_max.x;
float L = cb.uv_min_max.y;
float bias = cb.uv_min_max.z;
float max_lod = cb.uv_min_max.w;
float gs_lod = K - log2(abs(in.t.w)) * L;
// FIXME max useful ?
//float lod = max(min(gs_lod, max_lod) - bias, 0.f);
float lod = min(gs_lod, max_lod) - bias;
return sample_tex(tex_sampler, uv, level(lod));
}
else
{
return sample_tex(tex_sampler, uv, level(0));
}
}
float4 sample_p(float idx)
{
return palette.sample(palette_sampler, float2(idx, 0));
}
float4 clamp_wrap_uv(float4 uv)
{
float4 uv_out = uv;
float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy;
if (PS_WMS == PS_WMT)
{
if (PS_WMS == 2)
{
uv_out = clamp(uv, cb.uv_min_max.xyxy, cb.uv_min_max.zwzw);
}
else if (PS_WMS == 3)
{
// wrap negative uv coords to avoid an off by one error that shifted
// textures. Fixes Xenosaga's hair issue.
if (!FST)
uv = fract(uv);
uv_out = float4((ushort4(uv * tex_size) & ushort4(cb.uv_msk_fix.xyxy)) | ushort4(cb.uv_msk_fix.zwzw)) / tex_size;
}
}
else
{
if (PS_WMS == 2)
{
uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz);
}
else if (PS_WMS == 3)
{
if (!FST)
uv.xz = fract(uv.xz);
uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx;
}
if (PS_WMT == 2)
{
uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww);
}
else if (PS_WMT == 3)
{
if (!FST)
uv.yw = fract(uv.yw);
uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy;
}
}
return uv_out;
}
float4x4 sample_4c(float4 uv)
{
return {
sample_c(uv.xy),
sample_c(uv.zy),
sample_c(uv.xw),
sample_c(uv.zw),
};
}
float4 sample_4_index(float4 uv)
{
float4 c;
// Either GS will send a texture that contains a single alpha channel
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
// Note: texture gather can't be used because of special clamping/wrapping
// Also it doesn't support lod
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
uchar4 i = uchar4(c * 255.5f); // Denormalize value
if (PS_PAL_FMT == 1)
return float4(i & 0xF) / 255.f;
if (PS_PAL_FMT == 2)
return float4(i >> 4) / 255.f;
// Most textures will hit this code so keep normalized float value
return c;
}
float4x4 sample_4p(float4 u)
{
return {
sample_p(u.x),
sample_p(u.y),
sample_p(u.z),
sample_p(u.w),
};
}
uint fetch_raw_depth()
{
return tex_depth.read(ushort2(in.p.xy)) * 0x1p32f;
}
float4 fetch_raw_color()
{
if (PS_TEX_IS_FB)
return current_color;
else
return tex.read(ushort2(in.p.xy));
}
float4 fetch_c(ushort2 uv)
{
return PS_TEX_IS_DEPTH ? tex_depth.read(uv) : tex.read(uv);
}
// MARK: Depth sampling
ushort2 clamp_wrap_uv_depth(ushort2 uv)
{
ushort2 uv_out = uv;
// Keep the full precision
// It allow to multiply the ScalingFactor before the 1/16 coeff
ushort4 mask = ushort4(cb.uv_msk_fix) << 4;
if (PS_WMS == PS_WMT)
{
if (PS_WMS == 2)
uv_out = clamp(uv, mask.xy, mask.zw);
else if (PS_WMS == 3)
uv_out = (uv & mask.xy) | mask.zw;
}
else
{
if (PS_WMS == 2)
uv_out.x = clamp(uv.x, mask.x, mask.z);
else if (PS_WMS == 3)
uv_out.x = (uv.x & mask.x) | mask.z;
if (PS_WMT == 2)
uv_out.y = clamp(uv.y, mask.y, mask.w);
else if (PS_WMT == 3)
uv_out.y = (uv.y & mask.y) | mask.w;
}
return uv_out;
}
float4 sample_depth(float2 st)
{
float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f));
ushort2 uv = ushort2(uv_f);
float4 t = float4(0);
if (PS_TALES_OF_ABYSS_HLE)
{
// Warning: UV can't be used in channel effect
ushort depth = fetch_raw_depth();
// Convert msb based on the palette
t = palette.read(ushort2((depth >> 8) & 0xFF, 0)) * 255.f;
}
else if (PS_URBAN_CHAOS_HLE)
{
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
// So it will do a first channel trick to extract lsb, value is right-shifted.
// Then a new channel trick to extract msb which will shifted to the left.
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
// To be faster both steps (msb&lsb) are done in a single pass.
// Warning: UV can't be used in channel effect
ushort depth = fetch_raw_depth();
// Convert lsb based on the palette
t = palette.read(ushort2(depth & 0xFF, 0)) * 255.f;
// Msb is easier
float green = float((depth >> 8) & 0xFF) * 36.f;
green = min(green, 255.0f);
t.g += green;
}
else if (PS_DEPTH_FMT == 1)
{
t = convert_depth32_rgba8(fetch_c(uv).r);
}
else if (PS_DEPTH_FMT == 2)
{
t = convert_depth16_rgba8(fetch_c(uv).r);
}
else if (PS_DEPTH_FMT == 3)
{
t = fetch_c(uv) * 255.f;
}
if (PS_AEM_FMT == FMT_24)
t.a = (!PS_AEM || any(bool3(t.rgb))) ? 255.f * cb.ta.x : 0.f;
else if (PS_AEM_FMT == FMT_16)
t.a = t.a >= 128.f ? 255.f * cb.ta.y : (!PS_AEM || any(bool3(t.rgb))) ? 255.f * cb.ta.x : 0.f;
return t;
}
// MARK: Fetch a Single Channel
float4 fetch_red()
{
float rt = PS_TEX_IS_DEPTH ? float(fetch_raw_depth() & 0xFF) / 255.f : fetch_raw_color().r;
return sample_p(rt) * 255.f;
}
float4 fetch_green()
{
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 8) & 0xFF) / 255.f : fetch_raw_color().g;
return sample_p(rt) * 255.f;
}
float4 fetch_blue()
{
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 16) & 0xFF) / 255.f : fetch_raw_color().b;
return sample_p(rt) * 255.f;
}
float4 fetch_alpha()
{
return sample_p(fetch_raw_color().a) * 255.f;
}
float4 fetch_rgb()
{
float4 rt = fetch_raw_color();
return float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1) * 255.f;
}
float4 fetch_gXbY()
{
if (PS_TEX_IS_DEPTH)
{
uint depth = fetch_raw_depth();
uint bg = (depth >> (8 + cb.channel_shuffle.green_shift)) & 0xFF;
return float4(bg);
}
else
{
uchar4 rt = uchar4(fetch_raw_color() * 255.5f);
uchar green = (rt.g >> cb.channel_shuffle.green_shift) & cb.channel_shuffle.green_mask;
uchar blue = (rt.b >> cb.channel_shuffle.blue_shift) & cb.channel_shuffle.blue_mask;
return float4(green | blue);
}
}
float4 sample_color(float2 st)
{
if (PS_TCOFFSETHACK)
st += cb.tc_offset;
float4 t;
float4x4 c;
float2 dd;
if (!PS_LTF && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
{
c[0] = sample_c(st);
}
else
{
float4 uv;
if (PS_LTF)
{
uv = st.xyxy + cb.half_texel;
dd = fract(uv.xy * cb.wh.zw);
if (!FST)
{
// Background in Shin Megami Tensei Lucifers
// I suspect that uv isn't a standard number, so fract is outside of the [0;1] range
dd = saturate(dd);
}
}
else
{
uv = st.xyxy;
}
uv = clamp_wrap_uv(uv);
if (PS_PAL_FMT != 0)
c = sample_4p(sample_4_index(uv));
else
c = sample_4c(uv);
}
for (int i = 0; i < 4; i++)
{
if (PS_AEM_FMT == FMT_24)
c[i].a = !PS_AEM || any(bool3(c[i].rgb)) ? cb.ta.x : 0.f;
else if (PS_AEM_FMT == FMT_16)
c[i].a = c[i].a >= 0.5 ? cb.ta.y : !PS_AEM || any(bool3(c[i].rgb)) ? cb.ta.x : 0.f;
}
if (PS_LTF)
t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
else
t = c[0];
// The 0.05f helps to fix the overbloom of sotc
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
// interpolation could be slightly below the correct one.
return trunc(t * 255.f + 0.05f);
}
float4 tfx(float4 T, float4 C)
{
float4 C_out;
float4 FxT = trunc(trunc(C) * T / 128.f);
if (PS_TFX == 0)
C_out = FxT;
else if (PS_TFX == 1)
C_out = T;
else if (PS_TFX == 2)
C_out = float4(FxT.rgb, T.a) + C.a;
else if (PS_TFX == 3)
C_out = float4(FxT.rgb + C.a, T.a);
else
C_out = C;
if (!PS_TCC)
C_out.a = C.a;
// Clamp only when it is useful
if (PS_TFX == 0 || PS_TFX == 2 || PS_TFX == 3)
C_out = min(C_out, 255.f);
return C_out;
}
bool atst(float4 C)
{
float a = C.a;
switch (PS_ATST)
{
case 0:
break; // Nothing to do
case 1:
if (a > cb.aref)
return false;
break;
case 2:
if (a < cb.aref)
return false;
break;
case 3:
if (abs(a - cb.aref) > 0.5f)
return false;
break;
case 4:
if (abs(a - cb.aref) < 0.5f)
return false;
break;
}
return true;
}
void fog(thread float4& C, float f)
{
if (PS_FOG)
C.rgb = trunc(mix(cb.fog_color, C.rgb, f));
}
float4 ps_color()
{
float2 st, st_int;
if (!FST && PS_INVALID_TEX0)
{
st = (in.t.xy * cb.wh.xy) / (in.t.w * cb.wh.zw);
}
else if (!FST)
{
st = in.t.xy / in.t.w;
st_int = in.ti.zw / in.t.w;
}
else
{
// Note: xy are normalized coordinates
st = in.ti.xy;
st_int = in.ti.zw;
}
float4 T;
if (PS_CHANNEL == 1)
T = fetch_red();
else if (PS_CHANNEL == 2)
T = fetch_green();
else if (PS_CHANNEL == 3)
T = fetch_blue();
else if (PS_CHANNEL == 4)
T = fetch_alpha();
else if (PS_CHANNEL == 5)
T = fetch_rgb();
else if (PS_CHANNEL == 6)
T = fetch_gXbY();
else if (PS_DEPTH_FMT != 0)
T = sample_depth(st_int);
else
T = sample_color(st);
float4 C = tfx(T, IIP ? in.c : in.fc);
if (!atst(C))
discard_fragment();
fog(C, in.t.z);
return C;
}
void ps_fbmask(thread float4& C)
{
if (PS_FBMASK)
C = float4((uint4(C) & ~cb.fbmask) | (uint4(current_color * 255.5) & cb.fbmask));
}
void ps_dither(thread float4& C)
{
if (PS_DITHER == 0)
return;
ushort2 fpos;
if (PS_DITHER == 2)
fpos = ushort2(in.p.xy);
else
fpos = ushort2(in.p.xy / float2(SCALING_FACTOR));
C.rgb += cb.dither_matrix[fpos.y & 3][fpos.x & 3];
}
void ps_color_clamp_wrap(thread float4& C)
{
// When dithering the bottom 3 bits become meaningless and cause lines in the picture so we need to limit the color depth on dithered items
if (!SW_BLEND && !PS_DITHER)
return;
// Correct the Color value based on the output format
if (!PS_COLCLIP && !PS_HDR)
C.rgb = clamp(C.rgb, 0.f, 255.f); // Standard Clamp
// FIXME rouding of negative float?
// compiler uses trunc but it might need floor
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
if (PS_DFMT == FMT_16 && (PS_HDR || !PS_BLEND_MIX))
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C.rgb = float3(short3(C.rgb) & 0xF8);
else if (PS_COLCLIP && !PS_HDR)
C.rgb = float3(short3(C.rgb) & 0xFF);
}
template <typename T>
static T pick(uint selector, T zero, T one, T two)
{
return selector == 0 ? zero : selector == 1 ? one : two;
}
void ps_blend(thread float4& Color, float As)
{
if (SW_BLEND)
{
float Ad = PS_DFMT == FMT_24 ? 1.f : trunc(current_color.a * 255.5f) / 128.f;
float3 Cd = trunc(current_color.rgb * 255.5f);
float3 Cs = Color.rgb;
float3 A = pick(PS_BLEND_A, Cs, Cd, float3(0.f));
float3 B = pick(PS_BLEND_B, Cs, Cd, float3(0.f));
float C = pick(PS_BLEND_C, As, Ad, cb.alpha_fix);
float3 D = pick(PS_BLEND_D, Cs, Cd, float3(0.f));
if (PS_BLEND_MIX)
C = min(C, 1.f);
if (PS_BLEND_A == PS_BLEND_B)
Color.rgb = D;
else
Color.rgb = trunc((A - B) * C + D);
if (PS_PABE)
Color.rgb = (As >= 1.f) ? Color.rgb : Cs;
}
else
{
// Needed for Cd * (As/Ad/F + 1) blending mdoes
if (PS_CLR_HW == 1 || PS_CLR_HW == 5)
{
Color.rgb = 255.f;
}
else if (PS_CLR_HW == 2 || PS_CLR_HW == 4)
{
float Alpha = PS_BLEND_C == 2 ? cb.alpha_fix : As;
Color.rgb = saturate(Alpha - 1.f) * 255.f;
}
else if (PS_CLR_HW == 3)
{
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
Color.rgb *= (255.f / 128.f);
}
}
}
MainPSOut ps_main()
{
MainPSOut out = {};
if (PS_SCANMSK & 2)
{
if ((uint(in.p.y) & 1) == (PS_SCANMSK & 1))
discard_fragment();
}
if (PS_DATE >= 5)
{
// 1 => DATM == 0, 2 => DATM == 1
float rt_a = PS_WRITE_RG ? current_color.g : current_color.a;
bool bad = (PS_DATE & 3) == 1 ? (rt_a > 0.5) : (rt_a < 0.5);
if (bad)
discard_fragment();
}
if (PS_DATE == 3)
{
float stencil_ceil = prim_id_tex.read(uint2(in.p.xy)).r;
// Note prim_id == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
if (float(prim_id) > stencil_ceil)
discard_fragment();
}
float4 C = ps_color();
if (PS_SHUFFLE)
{
uchar4 denorm_c = uchar4(C);
uchar2 denorm_TA = uchar2(cb.ta * 255.5f);
C.rb = PS_READ_BA ? C.bb : C.rr;
if (PS_READ_BA)
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
else
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
// Must be done before alpha correction
float alpha_blend = SW_AD_TO_HW ? (PS_DFMT == FMT_24 ? 1.f : trunc(current_color.a * 255.5f) / 128.f) : (C.a / 128.f);
if (PS_DFMT == FMT_16)
{
float A_one = 128.f;
C.a = (PS_FBA) ? A_one : step(128.f, C.a) * A_one;
}
else if (PS_DFMT == FMT_32 && PS_FBA)
{
if (C.a < 128.f)
C.a += 128.f;
}
// Get first primitive that will write a failing alpha value
if (PS_DATE == 1)
{
// DATM == 0, Pixel with alpha equal to 1 will failed (128-255)
out.c0 = C.a > 127.5f ? float(prim_id) : FLT_MAX;
return out;
}
else if (PS_DATE == 2)
{
// DATM == 1, Pixel with alpha equal to 0 will failed (0-127)
out.c0 = C.a < 127.5f ? float(prim_id) : FLT_MAX;
return out;
}
ps_blend(C, alpha_blend);
ps_dither(C);
// Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C);
ps_fbmask(C);
if (PS_COLOR0)
out.c0 = C / 255.f;
if (PS_COLOR0 && PS_ONLY_ALPHA)
out.c0.rgb = 0;
if (PS_COLOR1)
out.c1 = alpha_blend;
if (PS_ZCLAMP)
out.depth = min(in.p.z, cb.max_depth);
return out;
}
};
#if FBFETCH_SUPPORT
fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]])
{
return in * 2;
}
constant bool NEEDS_RT_TEX = NEEDS_RT && !HAS_FBFETCH;
constant bool NEEDS_RT_FBF = NEEDS_RT && HAS_FBFETCH;
#else
constant bool NEEDS_RT_TEX = NEEDS_RT;
#endif
fragment MainPSOut ps_main(
MainPSIn in [[stage_in]],
constant GSMTLMainPSUniform& cb [[buffer(GSMTLBufferIndexHWUniforms)]],
sampler s [[sampler(0)]],
#if PRIMID_SUPPORT
uint primid [[primitive_id, function_constant(NEEDS_PRIMID)]],
#endif
#if FBFETCH_SUPPORT
float4 rt_fbf [[color(0), raster_order_group(0), function_constant(NEEDS_RT_FBF)]],
#endif
texture2d<float> tex [[texture(GSMTLTextureIndexTex), function_constant(PS_TEX_IS_COLOR)]],
depth2d<float> depth [[texture(GSMTLTextureIndexTex), function_constant(PS_TEX_IS_DEPTH)]],
texture2d<float> palette [[texture(GSMTLTextureIndexPalette), function_constant(PS_HAS_PALETTE)]],
texture2d<float> rt [[texture(GSMTLTextureIndexRenderTarget), function_constant(NEEDS_RT_TEX)]],
texture2d<float> primidtex [[texture(GSMTLTextureIndexPrimIDs), function_constant(PS_PRIM_CHECKING_READ)]])
{
PSMain main(in, cb);
main.tex_sampler = s;
if (PS_TEX_IS_COLOR)
main.tex = tex;
else
main.tex_depth = depth;
if (PS_HAS_PALETTE)
main.palette = palette;
if (PS_PRIM_CHECKING_READ)
main.prim_id_tex = primidtex;
#if PRIMID_SUPPORT
if (NEEDS_PRIMID)
main.prim_id = primid;
#endif
if (NEEDS_RT)
{
#if FBFETCH_SUPPORT
main.current_color = HAS_FBFETCH ? rt_fbf : rt.read(uint2(in.p.xy));
#else
main.current_color = rt.read(uint2(in.p.xy));
#endif
}
else
{
main.current_color = 0;
}
return main.ps_main();
}
#if PRIMID_SUPPORT
fragment uint primid_test(uint id [[primitive_id]])
{
return id;
}
#endif
// MARK: Markers for detecting the Metal version a metallib was compiled against
#if __METAL_VERSION__ >= 210
kernel void metal_version_21() {}
#endif
#if __METAL_VERSION__ >= 220
kernel void metal_version_22() {}
#endif
#if __METAL_VERSION__ >= 230
kernel void metal_version_23() {}
#endif

View File

@ -32,8 +32,19 @@ HostDisplay::~HostDisplay() = default;
const char* HostDisplay::RenderAPIToString(RenderAPI api)
{
static const char* names[] = {"None", "D3D11", "Vulkan", "OpenGL", "OpenGLES"};
return (static_cast<u32>(api) >= std::size(names)) ? names[0] : names[static_cast<u32>(api)];
switch (api)
{
#define CASE(x) case RenderAPI::x: return #x
CASE(None);
CASE(D3D11);
CASE(Metal);
CASE(Vulkan);
CASE(OpenGL);
CASE(OpenGLES);
#undef CASE
default:
return "Unknown";
}
}
bool HostDisplay::UsesLowerLeftOrigin() const
@ -122,6 +133,7 @@ std::string HostDisplay::GetFullscreenModeString(u32 width, u32 height, float re
#ifdef _WIN32
#include "Frontend/D3D11HostDisplay.h"
#endif
#include "GS/Renderers/Metal/GSMetalCPPAccessible.h"
std::unique_ptr<HostDisplay> HostDisplay::CreateDisplayForAPI(RenderAPI api)
{

View File

@ -46,6 +46,7 @@ public:
{
None,
D3D11,
Metal,
Vulkan,
OpenGL,
OpenGLES