Merge pull request #10754 from tellowkrinkle/Metal

VideoBackends: Add Metal backend
This commit is contained in:
JMC47 2022-07-23 01:57:35 -04:00 committed by GitHub
commit 89c4fdeeea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 4318 additions and 55 deletions

View File

@ -631,6 +631,10 @@ if(ENABLE_VULKAN)
target_link_libraries(core PUBLIC videovulkan)
endif()
if(APPLE)
target_link_libraries(core PUBLIC videometal)
endif()
if(USE_MGBA)
target_sources(core PRIVATE
HW/GBACore.cpp

View File

@ -8,6 +8,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
add_subdirectory(D3D12)
endif()
if(APPLE)
add_subdirectory(Metal)
endif()
if(ENABLE_VULKAN)
add_subdirectory(Vulkan)
endif()

View File

@ -111,6 +111,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter);
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames();
g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter);

View File

@ -150,8 +150,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
D3D::context->DrawIndexed(num_indices, base_index, base_vertex);
}
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
D3D::stateman->SetComputeShader(static_cast<const DXShader*>(shader)->GetD3DComputeShader());
D3D::stateman->SyncComputeBindings();

View File

@ -57,8 +57,8 @@ public:
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;
void SetFullscreen(bool enable_fullscreen) override;

View File

@ -365,8 +365,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0);
}
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
SetRootSignatures();
SetDescriptorHeaps();

View File

@ -69,8 +69,8 @@ public:
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

View File

@ -86,6 +86,7 @@ void VideoBackend::FillBackendInfo()
g_Config.backend_info.bSupportsTextureQueryLevels = true;
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsSettingObjectNames = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
// We can only check texture support once we have a device.
if (g_dx_context)

View File

@ -0,0 +1,40 @@
add_library(videometal
MRCHelpers.h
MTLBoundingBox.mm
MTLBoundingBox.h
MTLMain.mm
MTLObjectCache.h
MTLObjectCache.mm
MTLPerfQuery.mm
MTLPerfQuery.h
MTLPipeline.mm
MTLPipeline.h
MTLRenderer.mm
MTLRenderer.h
MTLShader.mm
MTLShader.h
MTLStateTracker.mm
MTLStateTracker.h
MTLTexture.mm
MTLTexture.h
MTLUtil.mm
MTLUtil.h
MTLVertexFormat.mm
MTLVertexFormat.h
MTLVertexManager.mm
MTLVertexManager.h
VideoBackend.h
)
find_library(METAL_LIBRARY Metal)
find_library(QUARTZCORE_LIBRARY QuartzCore)
target_link_libraries(videometal
PUBLIC
common
videocommon
PRIVATE
spirv_cross
${METAL_LIBRARY}
${QUARTZCORE_LIBRARY}
)

View File

@ -0,0 +1,81 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// clang-format off
#ifndef __OBJC__
#error This header is for use with Objective-C++ only.
#endif
#if __has_feature(objc_arc)
#error This file is for manual reference counting! Compile without -fobjc-arc
#endif
// clang-format on
#pragma once
#include <cstddef>
#include <utility>
/// Managed Obj-C pointer
template <typename T>
class MRCOwned
{
T ptr;
MRCOwned(T ptr) : ptr(ptr) {}
public:
MRCOwned() : ptr(nullptr) {}
MRCOwned(std::nullptr_t) : ptr(nullptr) {}
MRCOwned(MRCOwned&& other) : ptr(other.ptr) { other.ptr = nullptr; }
MRCOwned(const MRCOwned& other) : ptr(other.ptr) { [ptr retain]; }
~MRCOwned()
{
if (ptr)
[ptr release];
}
operator T() const { return ptr; }
MRCOwned& operator=(const MRCOwned& other)
{
[other.ptr retain];
if (ptr)
[ptr release];
ptr = other.ptr;
return *this;
}
MRCOwned& operator=(MRCOwned&& other)
{
std::swap(ptr, other.ptr);
return *this;
}
void Reset()
{
[ptr release];
ptr = nullptr;
}
T Disown()
{
T tmp = ptr;
ptr = nullptr;
return tmp;
}
T Get() const { return ptr; }
static MRCOwned Transfer(T ptr) { return MRCOwned(ptr); }
static MRCOwned Retain(T ptr)
{
[ptr retain];
return MRCOwned(ptr);
}
};
/// Take ownership of an Obj-C pointer (equivalent to __bridge_transfer)
template <typename T>
static inline MRCOwned<T> MRCTransfer(T ptr)
{
return MRCOwned<T>::Transfer(ptr);
}
/// Retain an Obj-C pointer (equivalent to __bridge)
template <typename T>
static inline MRCOwned<T> MRCRetain(T ptr)
{
return MRCOwned<T>::Retain(ptr);
}

View File

@ -0,0 +1,30 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/BoundingBox.h"
#include <Metal/Metal.h>
#include "VideoBackends/Metal/MRCHelpers.h"
namespace Metal
{
class BoundingBox final : public ::BoundingBox
{
public:
~BoundingBox() override;
bool Initialize() override;
protected:
std::vector<BBoxType> Read(u32 index, u32 length) override;
void Write(u32 index, const std::vector<BBoxType>& values) override;
private:
BBoxType* m_cpu_buffer_ptr;
MRCOwned<id<MTLFence>> m_download_fence;
MRCOwned<id<MTLFence>> m_upload_fence;
MRCOwned<id<MTLBuffer>> m_cpu_buffer;
MRCOwned<id<MTLBuffer>> m_gpu_buffer;
};
} // namespace Metal

View File

@ -0,0 +1,72 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLBoundingBox.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
static constexpr size_t BUFFER_SIZE = sizeof(BBoxType) * NUM_BBOX_VALUES;
Metal::BoundingBox::~BoundingBox()
{
if (g_state_tracker)
g_state_tracker->SetBBoxBuffer(nullptr, nullptr, nullptr);
}
bool Metal::BoundingBox::Initialize()
{
const MTLResourceOptions gpu_options =
MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked;
const id<MTLDevice> dev = g_device;
m_upload_fence = MRCTransfer([dev newFence]);
[m_upload_fence setLabel:@"BBox Upload Fence"];
m_download_fence = MRCTransfer([dev newFence]);
[m_download_fence setLabel:@"BBox Download Fence"];
m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]);
[m_gpu_buffer setLabel:@"BBox Buffer"];
m_cpu_buffer_ptr = static_cast<BBoxType*>([m_gpu_buffer contents]);
g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence);
return true;
}
std::vector<BBoxType> Metal::BoundingBox::Read(u32 index, u32 length)
{
@autoreleasepool
{
g_state_tracker->EndRenderPass();
g_state_tracker->FlushEncoders();
g_state_tracker->WaitForFlushedEncoders();
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
}
}
void Metal::BoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{
const u32 size = values.size() * sizeof(BBoxType);
if (!g_state_tracker->HasUnflushedData() && !g_state_tracker->GPUBusy())
{
// We can just write directly to the buffer!
memcpy(m_cpu_buffer_ptr + index, values.data(), size);
}
else
{
@autoreleasepool
{
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Other, size,
StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, values.data(), size);
g_state_tracker->EndRenderPass();
id<MTLBlitCommandEncoder> upload = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder];
[upload setLabel:@"BBox Upload"];
[upload waitForFence:m_download_fence];
[upload copyFromBuffer:map.gpu_buffer
sourceOffset:map.gpu_offset
toBuffer:m_gpu_buffer
destinationOffset:index * sizeof(BBoxType)
size:size];
[upload updateFence:m_upload_fence];
[upload endEncoding];
}
}
}

View File

@ -0,0 +1,166 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/VideoBackend.h"
#include <AppKit/AppKit.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include "Common/Common.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLPerfQuery.h"
#include "VideoBackends/Metal/MTLRenderer.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoBackends/Metal/MTLVertexManager.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
std::string Metal::VideoBackend::GetName() const
{
return NAME;
}
std::string Metal::VideoBackend::GetDisplayName() const
{
// i18n: Apple's Metal graphics API (https://developer.apple.com/metal/)
return _trans("Metal");
}
std::optional<std::string> Metal::VideoBackend::GetWarningMessage() const
{
if (Util::GetAdapterList().empty())
{
return _trans("No Metal-compatible GPUs were found. "
"Use the OpenGL backend or upgrade your computer/GPU");
}
return std::nullopt;
}
static bool WindowSystemTypeSupportsMetal(WindowSystemType type)
{
switch (type)
{
case WindowSystemType::MacOS:
return true;
default:
return false;
}
}
bool Metal::VideoBackend::Initialize(const WindowSystemInfo& wsi)
{
@autoreleasepool
{
if (!WindowSystemTypeSupportsMetal(wsi.type) || !wsi.render_surface)
{
PanicAlertFmt("Bad WindowSystemInfo for Metal renderer.");
return false;
}
auto devs = Util::GetAdapterList();
if (devs.empty())
{
PanicAlertFmt("No Metal GPUs detected.");
return false;
}
Util::PopulateBackendInfo(&g_Config);
Util::PopulateBackendInfoAdapters(&g_Config, devs);
// Since we haven't called InitializeShared yet, iAdapter may be out of range,
// so we have to check it ourselves.
size_t selected_adapter_index = static_cast<size_t>(g_Config.iAdapter);
if (selected_adapter_index >= devs.size())
{
WARN_LOG_FMT(VIDEO, "Metal adapter index out of range, selecting default adapter.");
selected_adapter_index = 0;
}
MRCOwned<id<MTLDevice>> adapter = std::move(devs[selected_adapter_index]);
Util::PopulateBackendInfoFeatures(&g_Config, adapter);
// With the backend information populated, we can now initialize videocommon.
InitializeShared();
MRCOwned<CAMetalLayer*> layer = MRCRetain(static_cast<CAMetalLayer*>(wsi.render_surface));
[layer setDevice:adapter];
if (Util::ToAbstract([layer pixelFormat]) == AbstractTextureFormat::Undefined)
[layer setPixelFormat:MTLPixelFormatBGRA8Unorm];
CGSize size = [layer bounds].size;
float scale = [layer contentsScale];
ObjectCache::Initialize(std::move(adapter));
g_state_tracker = std::make_unique<StateTracker>();
g_renderer = std::make_unique<Renderer>(std::move(layer), size.width * scale,
size.height * scale, scale);
g_vertex_manager = std::make_unique<VertexManager>();
g_perf_query = std::make_unique<PerfQuery>();
g_framebuffer_manager = std::make_unique<FramebufferManager>();
g_texture_cache = std::make_unique<TextureCacheBase>();
g_shader_cache = std::make_unique<VideoCommon::ShaderCache>();
if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() ||
!g_renderer->Initialize() || !g_framebuffer_manager->Initialize() ||
!g_texture_cache->Initialize())
{
PanicAlertFmt("Failed to initialize renderer classes");
Shutdown();
return false;
}
g_shader_cache->InitializeShaderCache();
return true;
}
}
void Metal::VideoBackend::Shutdown()
{
g_shader_cache->Shutdown();
g_renderer->Shutdown();
g_shader_cache.reset();
g_texture_cache.reset();
g_framebuffer_manager.reset();
g_perf_query.reset();
g_vertex_manager.reset();
g_renderer.reset();
g_state_tracker.reset();
ObjectCache::Shutdown();
ShutdownShared();
}
void Metal::VideoBackend::InitBackendInfo()
{
@autoreleasepool
{
Util::PopulateBackendInfo(&g_Config);
auto adapters = Util::GetAdapterList();
Util::PopulateBackendInfoAdapters(&g_Config, adapters);
if (!adapters.empty())
{
// Use the selected adapter, or the first to fill features.
size_t index = static_cast<size_t>(g_Config.iAdapter);
if (index >= adapters.size())
index = 0;
Util::PopulateBackendInfoFeatures(&g_Config, adapters[index]);
}
}
}
void Metal::VideoBackend::PrepareWindow(WindowSystemInfo& wsi)
{
if (wsi.type != WindowSystemType::MacOS)
return;
NSView* view = static_cast<NSView*>(wsi.render_surface);
CAMetalLayer* layer = [CAMetalLayer layer];
[view setWantsLayer:YES];
[view setLayer:layer];
wsi.render_surface = layer;
}

View File

@ -0,0 +1,106 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include <memory>
#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoCommon/RenderState.h"
struct AbstractPipelineConfig;
class AbstractPipeline;
namespace Metal
{
class Shader;
extern MRCOwned<id<MTLDevice>> g_device;
extern MRCOwned<id<MTLCommandQueue>> g_queue;
struct DepthStencilSelector
{
u8 value;
DepthStencilSelector() : value(0) {}
DepthStencilSelector(bool update_enable, enum CompareMode cmp)
: value(update_enable | (static_cast<u32>(cmp) << 1))
{
}
DepthStencilSelector(DepthState state)
: DepthStencilSelector(state.testenable ? state.updateenable : false,
state.testenable ? state.func : CompareMode::Always)
{
}
bool UpdateEnable() const { return value & 1; }
enum CompareMode CompareMode() const { return static_cast<enum CompareMode>(value >> 1); }
bool operator==(const DepthStencilSelector& other) { return value == other.value; }
bool operator!=(const DepthStencilSelector& other) { return !(*this == other); }
static constexpr size_t N_VALUES = 1 << 4;
};
struct SamplerSelector
{
u8 value;
SamplerSelector() : value(0) {}
SamplerSelector(SamplerState state)
{
value = (static_cast<u32>(state.tm0.min_filter.Value()) << 0) |
(static_cast<u32>(state.tm0.mag_filter.Value()) << 1) |
(static_cast<u32>(state.tm0.mipmap_filter.Value()) << 2) |
(static_cast<u32>(state.tm0.anisotropic_filtering) << 3);
value |= (static_cast<u32>(state.tm0.wrap_u.Value()) +
3 * static_cast<u32>(state.tm0.wrap_v.Value()))
<< 4;
}
FilterMode MinFilter() const { return static_cast<FilterMode>(value & 1); }
FilterMode MagFilter() const { return static_cast<FilterMode>((value >> 1) & 1); }
FilterMode MipFilter() const { return static_cast<FilterMode>((value >> 2) & 1); }
WrapMode WrapU() const { return static_cast<WrapMode>((value >> 4) % 3); }
WrapMode WrapV() const { return static_cast<WrapMode>((value >> 4) / 3); }
bool AnisotropicFiltering() const { return ((value >> 3) & 1); }
bool operator==(const SamplerSelector& other) { return value == other.value; }
bool operator!=(const SamplerSelector& other) { return !(*this == other); }
static constexpr size_t N_VALUES = (1 << 4) * 9;
};
class ObjectCache
{
ObjectCache();
public:
~ObjectCache();
static void Initialize(MRCOwned<id<MTLDevice>> device);
static void Shutdown();
id<MTLDepthStencilState> GetDepthStencil(DepthStencilSelector sel) { return m_dss[sel.value]; }
id<MTLSamplerState> GetSampler(SamplerSelector sel)
{
if (__builtin_expect(!m_samplers[sel.value], false))
m_samplers[sel.value] = CreateSampler(sel);
return m_samplers[sel.value];
}
id<MTLSamplerState> GetSampler(SamplerState state) { return GetSampler(SamplerSelector(state)); }
void ReloadSamplers();
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config);
void ShaderDestroyed(const Shader* shader);
private:
class Internal;
std::unique_ptr<Internal> m_internal;
MRCOwned<id<MTLSamplerState>> CreateSampler(SamplerSelector sel);
MRCOwned<id<MTLDepthStencilState>> m_dss[DepthStencilSelector::N_VALUES];
MRCOwned<id<MTLSamplerState>> m_samplers[SamplerSelector::N_VALUES];
};
extern std::unique_ptr<ObjectCache> g_object_cache;
} // namespace Metal

View File

@ -0,0 +1,500 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLObjectCache.h"
#include <map>
#include <mutex>
#include <optional>
#include "Common/Assert.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/Metal/MTLPipeline.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoBackends/Metal/MTLVertexFormat.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VertexShaderGen.h"
#include "VideoCommon/VideoConfig.h"
MRCOwned<id<MTLDevice>> Metal::g_device;
MRCOwned<id<MTLCommandQueue>> Metal::g_queue;
std::unique_ptr<Metal::ObjectCache> Metal::g_object_cache;
static void SetupDepthStencil(
MRCOwned<id<MTLDepthStencilState>> (&dss)[Metal::DepthStencilSelector::N_VALUES]);
Metal::ObjectCache::ObjectCache()
{
m_internal = std::make_unique<Internal>();
SetupDepthStencil(m_dss);
}
Metal::ObjectCache::~ObjectCache()
{
}
void Metal::ObjectCache::Initialize(MRCOwned<id<MTLDevice>> device)
{
g_device = std::move(device);
g_queue = MRCTransfer([g_device newCommandQueue]);
g_object_cache = std::unique_ptr<ObjectCache>(new ObjectCache);
}
void Metal::ObjectCache::Shutdown()
{
g_object_cache.reset();
g_queue = nullptr;
g_device = nullptr;
}
// MARK: Depth Stencil State
// clang-format off
static MTLCompareFunction Convert(CompareMode mode)
{
const bool invert_depth = !g_Config.backend_info.bSupportsReversedDepthRange;
switch (mode)
{
case CompareMode::Never: return MTLCompareFunctionNever;
case CompareMode::Less: return invert_depth ? MTLCompareFunctionGreater
: MTLCompareFunctionLess;
case CompareMode::Equal: return MTLCompareFunctionEqual;
case CompareMode::LEqual: return invert_depth ? MTLCompareFunctionGreaterEqual
: MTLCompareFunctionLessEqual;
case CompareMode::Greater: return invert_depth ? MTLCompareFunctionLess
: MTLCompareFunctionGreater;
case CompareMode::NEqual: return MTLCompareFunctionNotEqual;
case CompareMode::GEqual: return invert_depth ? MTLCompareFunctionLessEqual
: MTLCompareFunctionGreaterEqual;
case CompareMode::Always: return MTLCompareFunctionAlways;
}
}
static const char* to_string(MTLCompareFunction compare)
{
switch (compare)
{
case MTLCompareFunctionNever: return "Never";
case MTLCompareFunctionGreater: return "Greater";
case MTLCompareFunctionEqual: return "Equal";
case MTLCompareFunctionGreaterEqual: return "GEqual";
case MTLCompareFunctionLess: return "Less";
case MTLCompareFunctionNotEqual: return "NEqual";
case MTLCompareFunctionLessEqual: return "LEqual";
case MTLCompareFunctionAlways: return "Always";
}
}
// clang-format on
static void SetupDepthStencil(
MRCOwned<id<MTLDepthStencilState>> (&dss)[Metal::DepthStencilSelector::N_VALUES])
{
auto desc = MRCTransfer([MTLDepthStencilDescriptor new]);
Metal::DepthStencilSelector sel;
for (size_t i = 0; i < std::size(dss); ++i)
{
sel.value = i;
MTLCompareFunction mcompare = Convert(sel.CompareMode());
[desc setDepthWriteEnabled:sel.UpdateEnable()];
[desc setDepthCompareFunction:mcompare];
[desc setLabel:[NSString stringWithFormat:@"DSS %s%s", to_string(mcompare),
sel.UpdateEnable() ? "+Write" : ""]];
dss[i] = MRCTransfer([Metal::g_device newDepthStencilStateWithDescriptor:desc]);
}
}
// MARK: Samplers
// clang-format off
static MTLSamplerMinMagFilter ConvertMinMag(FilterMode filter)
{
switch (filter)
{
case FilterMode::Linear: return MTLSamplerMinMagFilterLinear;
case FilterMode::Near: return MTLSamplerMinMagFilterNearest;
}
}
static MTLSamplerMipFilter ConvertMip(FilterMode filter)
{
switch (filter)
{
case FilterMode::Linear: return MTLSamplerMipFilterLinear;
case FilterMode::Near: return MTLSamplerMipFilterNearest;
}
}
static MTLSamplerAddressMode Convert(WrapMode wrap)
{
switch (wrap)
{
case WrapMode::Clamp: return MTLSamplerAddressModeClampToEdge;
case WrapMode::Mirror: return MTLSamplerAddressModeMirrorRepeat;
case WrapMode::Repeat: return MTLSamplerAddressModeRepeat;
}
}
static const char* to_string(FilterMode filter)
{
switch (filter)
{
case FilterMode::Linear: return "Ln";
case FilterMode::Near: return "Pt";
}
}
static const char* to_string(WrapMode wrap)
{
switch (wrap)
{
case WrapMode::Clamp: return "C";
case WrapMode::Mirror: return "M";
case WrapMode::Repeat: return "R";
}
}
// clang-format on
MRCOwned<id<MTLSamplerState>> Metal::ObjectCache::CreateSampler(SamplerSelector sel)
{
@autoreleasepool
{
auto desc = MRCTransfer([MTLSamplerDescriptor new]);
[desc setMinFilter:ConvertMinMag(sel.MinFilter())];
[desc setMagFilter:ConvertMinMag(sel.MagFilter())];
[desc setMipFilter:ConvertMip(sel.MipFilter())];
[desc setSAddressMode:Convert(sel.WrapU())];
[desc setTAddressMode:Convert(sel.WrapV())];
[desc setMaxAnisotropy:1 << (sel.AnisotropicFiltering() ? g_ActiveConfig.iMaxAnisotropy : 0)];
[desc setLabel:MRCTransfer([[NSString alloc]
initWithFormat:@"%s%s%s %s%s%s", to_string(sel.MinFilter()),
to_string(sel.MagFilter()), to_string(sel.MipFilter()),
to_string(sel.WrapU()), to_string(sel.WrapV()),
sel.AnisotropicFiltering() ? "(AF)" : ""])];
return MRCTransfer([Metal::g_device newSamplerStateWithDescriptor:desc]);
}
}
void Metal::ObjectCache::ReloadSamplers()
{
for (auto& sampler : m_samplers)
sampler = nullptr;
}
// MARK: Pipelines
static MTLPrimitiveTopologyClass GetClass(PrimitiveType prim)
{
switch (prim)
{
case PrimitiveType::Points:
return MTLPrimitiveTopologyClassPoint;
case PrimitiveType::Lines:
return MTLPrimitiveTopologyClassLine;
case PrimitiveType::Triangles:
case PrimitiveType::TriangleStrip:
return MTLPrimitiveTopologyClassTriangle;
}
}
static MTLPrimitiveType Convert(PrimitiveType prim)
{
// clang-format off
switch (prim)
{
case PrimitiveType::Points: return MTLPrimitiveTypePoint;
case PrimitiveType::Lines: return MTLPrimitiveTypeLine;
case PrimitiveType::Triangles: return MTLPrimitiveTypeTriangle;
case PrimitiveType::TriangleStrip: return MTLPrimitiveTypeTriangleStrip;
}
// clang-format on
}
static MTLCullMode Convert(CullMode cull)
{
switch (cull)
{
case CullMode::None:
case CullMode::All: // Handled by disabling rasterization
return MTLCullModeNone;
case CullMode::Front:
return MTLCullModeFront;
case CullMode::Back:
return MTLCullModeBack;
}
}
static MTLBlendFactor Convert(DstBlendFactor factor, bool usedualsrc)
{
// clang-format off
switch (factor)
{
case DstBlendFactor::Zero: return MTLBlendFactorZero;
case DstBlendFactor::One: return MTLBlendFactorOne;
case DstBlendFactor::SrcClr: return MTLBlendFactorSourceColor;
case DstBlendFactor::InvSrcClr: return MTLBlendFactorOneMinusSourceColor;
case DstBlendFactor::SrcAlpha: return usedualsrc ? MTLBlendFactorSource1Alpha
: MTLBlendFactorSourceAlpha;
case DstBlendFactor::InvSrcAlpha: return usedualsrc ? MTLBlendFactorOneMinusSource1Alpha
: MTLBlendFactorOneMinusSourceAlpha;
case DstBlendFactor::DstAlpha: return MTLBlendFactorDestinationAlpha;
case DstBlendFactor::InvDstAlpha: return MTLBlendFactorOneMinusDestinationAlpha;
}
// clang-format on
}
static MTLBlendFactor Convert(SrcBlendFactor factor, bool usedualsrc)
{
// clang-format off
switch (factor)
{
case SrcBlendFactor::Zero: return MTLBlendFactorZero;
case SrcBlendFactor::One: return MTLBlendFactorOne;
case SrcBlendFactor::DstClr: return MTLBlendFactorDestinationColor;
case SrcBlendFactor::InvDstClr: return MTLBlendFactorOneMinusDestinationColor;
case SrcBlendFactor::SrcAlpha: return usedualsrc ? MTLBlendFactorSource1Alpha
: MTLBlendFactorSourceAlpha;
case SrcBlendFactor::InvSrcAlpha: return usedualsrc ? MTLBlendFactorOneMinusSource1Alpha
: MTLBlendFactorOneMinusSourceAlpha;
case SrcBlendFactor::DstAlpha: return MTLBlendFactorDestinationAlpha;
case SrcBlendFactor::InvDstAlpha: return MTLBlendFactorOneMinusDestinationAlpha;
}
// clang-format on
}
class Metal::ObjectCache::Internal
{
public:
using StoredPipeline = std::pair<MRCOwned<id<MTLRenderPipelineState>>, PipelineReflection>;
/// Holds only the things that are actually used in a Metal pipeline
struct PipelineID
{
struct VertexAttribute
{
// Just hold the things that might differ while using the same shader
// (Really only a thing for ubershaders)
u8 offset : 6;
u8 components : 2;
VertexAttribute() = default;
explicit VertexAttribute(AttributeFormat format)
: offset(format.offset), components(format.components - 1)
{
if (!format.enable)
offset = 0x3F; // Set it to something unlikely
}
};
template <size_t N>
static void CopyAll(std::array<VertexAttribute, N>& output, const AttributeFormat (&input)[N])
{
for (size_t i = 0; i < N; ++i)
output[i] = VertexAttribute(input[i]);
}
PipelineID(const AbstractPipelineConfig& cfg)
{
memset(this, 0, sizeof(*this));
if (const NativeVertexFormat* v = cfg.vertex_format)
{
const PortableVertexDeclaration& decl = v->GetVertexDeclaration();
v_stride = v->GetVertexStride();
v_position = VertexAttribute(decl.position);
CopyAll(v_normals, decl.normals);
CopyAll(v_colors, decl.colors);
CopyAll(v_texcoords, decl.texcoords);
v_posmtx = VertexAttribute(decl.posmtx);
}
vertex_shader = static_cast<const Shader*>(cfg.vertex_shader);
fragment_shader = static_cast<const Shader*>(cfg.pixel_shader);
framebuffer.color_texture_format = cfg.framebuffer_state.color_texture_format.Value();
framebuffer.depth_texture_format = cfg.framebuffer_state.depth_texture_format.Value();
framebuffer.samples = cfg.framebuffer_state.samples.Value();
blend.colorupdate = cfg.blending_state.colorupdate.Value();
blend.alphaupdate = cfg.blending_state.alphaupdate.Value();
if (cfg.blending_state.blendenable)
{
// clang-format off
blend.blendenable = true;
blend.usedualsrc = cfg.blending_state.usedualsrc.Value();
blend.srcfactor = cfg.blending_state.srcfactor.Value();
blend.dstfactor = cfg.blending_state.dstfactor.Value();
blend.srcfactoralpha = cfg.blending_state.srcfactoralpha.Value();
blend.dstfactoralpha = cfg.blending_state.dstfactoralpha.Value();
blend.subtract = cfg.blending_state.subtract.Value();
blend.subtractAlpha = cfg.blending_state.subtractAlpha.Value();
// clang-format on
}
// Throw extras in bits we don't otherwise use
if (cfg.rasterization_state.cullmode == CullMode::All)
blend.hex |= 1 << 29;
if (cfg.rasterization_state.primitive == PrimitiveType::Points)
blend.hex |= 1 << 30;
else if (cfg.rasterization_state.primitive == PrimitiveType::Lines)
blend.hex |= 1 << 31;
}
PipelineID() { memset(this, 0, sizeof(*this)); }
PipelineID(const PipelineID& other) { memcpy(this, &other, sizeof(*this)); }
PipelineID& operator=(const PipelineID& other)
{
memcpy(this, &other, sizeof(*this));
return *this;
}
bool operator<(const PipelineID& other) const
{
return memcmp(this, &other, sizeof(*this)) < 0;
}
bool operator==(const PipelineID& other) const
{
return memcmp(this, &other, sizeof(*this)) == 0;
}
u8 v_stride;
VertexAttribute v_position;
std::array<VertexAttribute, 3> v_normals;
std::array<VertexAttribute, 2> v_colors;
std::array<VertexAttribute, 8> v_texcoords;
VertexAttribute v_posmtx;
const Shader* vertex_shader;
const Shader* fragment_shader;
BlendingState blend;
FramebufferState framebuffer;
};
std::mutex m_mtx;
std::condition_variable m_cv;
std::map<PipelineID, StoredPipeline> m_pipelines;
std::map<const Shader*, std::vector<PipelineID>> m_shaders;
std::array<u32, 3> m_pipeline_counter;
StoredPipeline CreatePipeline(const AbstractPipelineConfig& config)
{
@autoreleasepool
{
ASSERT(!config.geometry_shader);
auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[desc setVertexFunction:static_cast<const Shader*>(config.vertex_shader)->GetShader()];
[desc setFragmentFunction:static_cast<const Shader*>(config.pixel_shader)->GetShader()];
if (config.usage == AbstractPipelineUsage::GX)
{
if ([[[desc vertexFunction] label] containsString:@"Uber"])
[desc
setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]];
else
[desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]];
}
else
{
[desc setLabel:[NSString stringWithFormat:@"Utility Pipeline %d", m_pipeline_counter[2]++]];
}
if (config.vertex_format)
[desc setVertexDescriptor:static_cast<const VertexFormat*>(config.vertex_format)->Get()];
RasterizationState rs = config.rasterization_state;
[desc setInputPrimitiveTopology:GetClass(rs.primitive)];
if (rs.cullmode == CullMode::All)
[desc setRasterizationEnabled:NO];
MTLRenderPipelineColorAttachmentDescriptor* color0 =
[[desc colorAttachments] objectAtIndexedSubscript:0];
BlendingState bs = config.blending_state;
MTLColorWriteMask mask = MTLColorWriteMaskNone;
if (bs.colorupdate)
mask |= MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue;
if (bs.alphaupdate)
mask |= MTLColorWriteMaskAlpha;
[color0 setWriteMask:mask];
if (bs.blendenable)
{
// clang-format off
[color0 setBlendingEnabled:YES];
[color0 setSourceRGBBlendFactor: Convert(bs.srcfactor, bs.usedualsrc)];
[color0 setSourceAlphaBlendFactor: Convert(bs.srcfactoralpha, bs.usedualsrc)];
[color0 setDestinationRGBBlendFactor: Convert(bs.dstfactor, bs.usedualsrc)];
[color0 setDestinationAlphaBlendFactor:Convert(bs.dstfactoralpha, bs.usedualsrc)];
[color0 setRgbBlendOperation: bs.subtract ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd];
[color0 setAlphaBlendOperation:bs.subtractAlpha ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd];
// clang-format on
}
FramebufferState fs = config.framebuffer_state;
[desc setRasterSampleCount:fs.samples];
[color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)];
[desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)];
if (Util::HasStencil(fs.depth_texture_format))
[desc setStencilAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)];
NSError* err = nullptr;
MTLRenderPipelineReflection* reflection = nullptr;
id<MTLRenderPipelineState> pipe =
[g_device newRenderPipelineStateWithDescriptor:desc
options:MTLPipelineOptionArgumentInfo
reflection:&reflection
error:&err];
if (err)
{
PanicAlertFmt("Failed to compile pipeline for {} and {}: {}",
[[[desc vertexFunction] label] UTF8String],
[[[desc fragmentFunction] label] UTF8String],
[[err localizedDescription] UTF8String]);
return std::make_pair(nullptr, PipelineReflection());
}
return std::make_pair(MRCTransfer(pipe), PipelineReflection(reflection));
}
}
StoredPipeline GetOrCreatePipeline(const AbstractPipelineConfig& config)
{
std::unique_lock<std::mutex> lock(m_mtx);
PipelineID pid(config);
auto it = m_pipelines.find(pid);
if (it != m_pipelines.end())
{
while (!it->second.first && !it->second.second.textures)
m_cv.wait(lock); // Wait for whoever's already compiling this
return it->second;
}
// Reserve the spot now, so other threads know we're making it
it = m_pipelines.insert({pid, {nullptr, PipelineReflection()}}).first;
lock.unlock();
StoredPipeline pipe = CreatePipeline(config);
lock.lock();
if (pipe.first)
it->second = pipe;
else
it->second.second.textures = 1; // Abuse this as a "failed to create pipeline" flag
m_shaders[pid.vertex_shader].push_back(pid);
m_shaders[pid.fragment_shader].push_back(pid);
lock.unlock();
m_cv.notify_all(); // Wake up anyone who might be waiting
return pipe;
}
void ShaderDestroyed(const Shader* shader)
{
std::lock_guard<std::mutex> lock(m_mtx);
auto it = m_shaders.find(shader);
if (it == m_shaders.end())
return;
// It's unlikely, but if a shader is destroyed, a new one could be made with the same address
// (Also, we know it won't be used anymore, so there's no reason to keep these around)
for (const PipelineID& pid : it->second)
m_pipelines.erase(pid);
m_shaders.erase(it);
}
};
std::unique_ptr<AbstractPipeline>
Metal::ObjectCache::CreatePipeline(const AbstractPipelineConfig& config)
{
Internal::StoredPipeline pipeline = m_internal->GetOrCreatePipeline(config);
if (!pipeline.first)
return nullptr;
return std::make_unique<Pipeline>(
std::move(pipeline.first), pipeline.second, Convert(config.rasterization_state.primitive),
Convert(config.rasterization_state.cullmode), config.depth_state, config.usage);
}
void Metal::ObjectCache::ShaderDestroyed(const Shader* shader)
{
m_internal->ShaderDestroyed(shader);
}

View File

@ -0,0 +1,34 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
#include "VideoCommon/PerfQueryBase.h"
namespace Metal
{
class PerfQuery final : public PerfQueryBase
{
public:
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
/// Notify PerfQuery of a new pending encoder
/// One call to ReturnResults should be made for every call to IncCount
void IncCount() { m_query_count.fetch_add(1, std::memory_order_relaxed); }
/// May be called from any thread
void ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, u32 query_id);
private:
u32 m_current_query = 0;
std::mutex m_results_mtx;
std::condition_variable m_cv;
};
} // namespace Metal

View File

@ -0,0 +1,90 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLPerfQuery.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
g_state_tracker->EnablePerfQuery(type, m_current_query);
}
void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
g_state_tracker->DisablePerfQuery();
}
void Metal::PerfQuery::ResetQuery()
{
std::lock_guard<std::mutex> lock(m_results_mtx);
m_current_query++;
for (std::atomic<u32>& result : m_results)
result.store(0, std::memory_order_relaxed);
}
u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result;
}
void Metal::PerfQuery::FlushResults()
{
if (IsFlushed())
return;
// There's a possibility that some active performance queries are unflushed
g_state_tracker->FlushEncoders();
std::unique_lock<std::mutex> lock(m_results_mtx);
while (!IsFlushed())
m_cv.wait(lock);
}
bool Metal::PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_acquire) == 0;
}
void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
u32 query_id)
{
{
std::lock_guard<std::mutex> lock(m_results_mtx);
if (m_current_query == query_id)
{
for (size_t i = 0; i < count; ++i)
{
u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
native_res_result /= g_ActiveConfig.iMultisamples;
m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);
}
}
m_query_count.fetch_sub(1, std::memory_order_release);
}
m_cv.notify_one();
}

View File

@ -0,0 +1,71 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLShader.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
namespace Metal
{
struct PipelineReflection
{
u32 textures = 0;
u32 samplers = 0;
u32 vertex_buffers = 0;
u32 fragment_buffers = 0;
PipelineReflection() = default;
explicit PipelineReflection(MTLRenderPipelineReflection* reflection);
};
class Pipeline final : public AbstractPipeline
{
public:
explicit Pipeline(MRCOwned<id<MTLRenderPipelineState>> pipeline,
const PipelineReflection& reflection, MTLPrimitiveType prim, MTLCullMode cull,
DepthState depth, AbstractPipelineUsage usage);
id<MTLRenderPipelineState> Get() const { return m_pipeline; }
MTLPrimitiveType Prim() const { return m_prim; }
MTLCullMode Cull() const { return m_cull; }
DepthStencilSelector DepthStencil() const { return m_depth_stencil; }
AbstractPipelineUsage Usage() const { return m_usage; }
u32 GetTextures() const { return m_reflection.textures; }
u32 GetSamplers() const { return m_reflection.samplers; }
u32 GetVertexBuffers() const { return m_reflection.vertex_buffers; }
u32 GetFragmentBuffers() const { return m_reflection.fragment_buffers; }
bool UsesVertexBuffer(u32 index) const { return m_reflection.vertex_buffers & (1 << index); }
bool UsesFragmentBuffer(u32 index) const { return m_reflection.fragment_buffers & (1 << index); }
private:
MRCOwned<id<MTLRenderPipelineState>> m_pipeline;
MTLPrimitiveType m_prim;
MTLCullMode m_cull;
DepthStencilSelector m_depth_stencil;
AbstractPipelineUsage m_usage;
PipelineReflection m_reflection;
};
class ComputePipeline : public Shader
{
public:
explicit ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection,
std::string msl, MRCOwned<id<MTLFunction>> shader,
MRCOwned<id<MTLComputePipelineState>> pipeline);
id<MTLComputePipelineState> GetComputePipeline() const { return m_compute_pipeline; }
bool UsesTexture(u32 index) const { return m_textures & (1 << index); }
bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); }
private:
MRCOwned<id<MTLComputePipelineState>> m_compute_pipeline;
u32 m_textures = 0;
u32 m_buffers = 0;
};
} // namespace Metal

View File

@ -0,0 +1,70 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLPipeline.h"
#include "Common/MsgHandler.h"
static void MarkAsUsed(u32* list, u32 start, u32 length)
{
for (u32 i = start; i < start + length; ++i)
*list |= 1 << i;
}
static void GetArguments(NSArray<MTLArgument*>* arguments, u32* textures, u32* samplers,
u32* buffers)
{
for (MTLArgument* argument in arguments)
{
const u32 idx = [argument index];
const u32 length = [argument arrayLength];
if (idx + length > 32)
{
PanicAlertFmt("Making a MTLPipeline with high argument index {:d}..<{:d} for {:s}", //
idx, idx + length, [[argument name] UTF8String]);
continue;
}
switch ([argument type])
{
case MTLArgumentTypeTexture:
if (textures)
MarkAsUsed(textures, idx, length);
else
PanicAlertFmt("Vertex function wants a texture!");
break;
case MTLArgumentTypeSampler:
if (samplers)
MarkAsUsed(samplers, idx, length);
else
PanicAlertFmt("Vertex function wants a sampler!");
break;
case MTLArgumentTypeBuffer:
MarkAsUsed(buffers, idx, length);
break;
default:
break;
}
}
}
Metal::PipelineReflection::PipelineReflection(MTLRenderPipelineReflection* reflection)
{
GetArguments([reflection vertexArguments], nullptr, nullptr, &vertex_buffers);
GetArguments([reflection fragmentArguments], &textures, &samplers, &fragment_buffers);
}
Metal::Pipeline::Pipeline(MRCOwned<id<MTLRenderPipelineState>> pipeline,
const PipelineReflection& reflection, MTLPrimitiveType prim,
MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage)
: m_pipeline(std::move(pipeline)), m_prim(prim), m_cull(cull), m_depth_stencil(depth),
m_usage(usage), m_reflection(reflection)
{
}
Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection,
std::string msl, MRCOwned<id<MTLFunction>> shader,
MRCOwned<id<MTLComputePipelineState>> pipeline)
: Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline))
{
GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers);
}

View File

@ -0,0 +1,90 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include "VideoCommon/RenderBase.h"
#include "VideoBackends/Metal/MRCHelpers.h"
namespace Metal
{
class Framebuffer;
class Texture;
class Renderer final : public ::Renderer
{
public:
Renderer(MRCOwned<CAMetalLayer*> layer, int width, int height, float layer_scale);
~Renderer() override;
bool IsHeadless() const override;
bool Initialize() override;
std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config,
std::string_view name) override;
std::unique_ptr<AbstractStagingTexture>
CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override;
std::unique_ptr<AbstractFramebuffer>
CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override;
std::unique_ptr<AbstractShader> CreateShaderFromSource(ShaderStage stage, std::string_view source,
std::string_view name) override;
std::unique_ptr<AbstractShader> CreateShaderFromBinary(ShaderStage stage, const void* data,
size_t length,
std::string_view name) override;
std::unique_ptr<AbstractShader> CreateShaderFromMSL(ShaderStage stage, std::string msl,
std::string_view glsl, std::string_view name);
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
std::unique_ptr<AbstractPipeline> CreatePipeline(const AbstractPipelineConfig& config,
const void* cache_data = nullptr,
size_t cache_data_length = 0) override;
void Flush() override;
void WaitForGPUIdle() override;
void OnConfigChanged(u32 bits) override;
void ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable, bool alpha_enable,
bool z_enable, u32 color, u32 z) override;
void SetPipeline(const AbstractPipeline* pipeline) override;
void SetFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override;
void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {},
float depth_value = 0.0f) override;
void SetScissorRect(const MathUtil::Rectangle<int>& rc) override;
void SetTexture(u32 index, const AbstractTexture* texture) override;
void SetSamplerState(u32 index, const SamplerState& state) override;
void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override;
void UnbindTexture(const AbstractTexture* texture) override;
void SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;
protected:
std::unique_ptr<::BoundingBox> CreateBoundingBox() const override;
private:
MRCOwned<CAMetalLayer*> m_layer;
MRCOwned<id<CAMetalDrawable>> m_drawable;
std::unique_ptr<Texture> m_bb_texture;
std::unique_ptr<Framebuffer> m_backbuffer;
u32 m_texture_counter = 0;
u32 m_staging_texture_counter = 0;
std::array<u32, 4> m_shader_counter = {};
void CheckForSurfaceChange();
void CheckForSurfaceResize();
void SetupSurface();
};
} // namespace Metal

View File

@ -0,0 +1,502 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLRenderer.h"
#include "VideoBackends/Metal/MTLBoundingBox.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLPipeline.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
#include "VideoBackends/Metal/MTLTexture.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoBackends/Metal/MTLVertexFormat.h"
#include "VideoBackends/Metal/MTLVertexManager.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/VideoBackendBase.h"
Metal::Renderer::Renderer(MRCOwned<CAMetalLayer*> layer, int width, int height, float layer_scale)
: ::Renderer(width, height, layer_scale, Util::ToAbstract([layer pixelFormat])),
m_layer(std::move(layer))
{
UpdateActiveConfig();
}
Metal::Renderer::~Renderer() = default;
bool Metal::Renderer::IsHeadless() const
{
return m_layer == nullptr;
}
bool Metal::Renderer::Initialize()
{
if (!::Renderer::Initialize())
return false;
SetupSurface();
g_state_tracker->FlushEncoders();
return true;
}
// MARK: Texture Creation
std::unique_ptr<AbstractTexture> Metal::Renderer::CreateTexture(const TextureConfig& config,
std::string_view name)
{
@autoreleasepool
{
MRCOwned<MTLTextureDescriptor*> desc = MRCTransfer([MTLTextureDescriptor new]);
[desc setTextureType:config.samples > 1 ? MTLTextureType2DMultisampleArray :
MTLTextureType2DArray];
[desc setPixelFormat:Util::FromAbstract(config.format)];
[desc setWidth:config.width];
[desc setHeight:config.height];
[desc setMipmapLevelCount:config.levels];
[desc setArrayLength:config.layers];
[desc setSampleCount:config.samples];
[desc setStorageMode:MTLStorageModePrivate];
MTLTextureUsage usage = MTLTextureUsageShaderRead;
if (config.IsRenderTarget())
usage |= MTLTextureUsageRenderTarget;
if (config.IsComputeImage())
usage |= MTLTextureUsageShaderWrite;
[desc setUsage:usage];
id<MTLTexture> texture = [g_device newTextureWithDescriptor:desc];
if (!texture)
return nullptr;
if (name.empty())
[texture setLabel:[NSString stringWithFormat:@"Texture %d", m_texture_counter++]];
else
[texture setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data()
length:name.size()
encoding:NSUTF8StringEncoding])];
return std::make_unique<Texture>(MRCTransfer(texture), config);
}
}
std::unique_ptr<AbstractStagingTexture>
Metal::Renderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config)
{
@autoreleasepool
{
const size_t stride = config.GetStride();
const size_t buffer_size = stride * static_cast<size_t>(config.height);
MTLResourceOptions options = MTLStorageModeShared;
if (type == StagingTextureType::Upload)
options |= MTLResourceCPUCacheModeWriteCombined;
id<MTLBuffer> buffer = [g_device newBufferWithLength:buffer_size options:options];
if (!buffer)
return nullptr;
[buffer
setLabel:[NSString stringWithFormat:@"Staging Texture %d", m_staging_texture_counter++]];
return std::make_unique<StagingTexture>(MRCTransfer(buffer), type, config);
}
}
std::unique_ptr<AbstractFramebuffer>
Metal::Renderer::CreateFramebuffer(AbstractTexture* color_attachment,
AbstractTexture* depth_attachment)
{
AbstractTexture* const either_attachment = color_attachment ? color_attachment : depth_attachment;
return std::make_unique<Framebuffer>(
color_attachment, depth_attachment, either_attachment->GetWidth(),
either_attachment->GetHeight(), either_attachment->GetLayers(),
either_attachment->GetSamples());
}
// MARK: Pipeline Creation
std::unique_ptr<AbstractShader> Metal::Renderer::CreateShaderFromSource(ShaderStage stage,
std::string_view source,
std::string_view name)
{
std::optional<std::string> msl = Util::TranslateShaderToMSL(stage, source);
if (!msl.has_value())
{
PanicAlertFmt("Failed to convert shader {} to MSL", name);
return nullptr;
}
return CreateShaderFromMSL(stage, std::move(*msl), source, name);
}
std::unique_ptr<AbstractShader> Metal::Renderer::CreateShaderFromBinary(ShaderStage stage,
const void* data,
size_t length,
std::string_view name)
{
return CreateShaderFromMSL(stage, std::string(static_cast<const char*>(data), length), {}, name);
}
// clang-format off
static const char* StageFilename(ShaderStage stage)
{
switch (stage)
{
case ShaderStage::Vertex: return "vs";
case ShaderStage::Geometry: return "gs";
case ShaderStage::Pixel: return "ps";
case ShaderStage::Compute: return "cs";
}
}
static NSString* GenericShaderName(ShaderStage stage)
{
switch (stage)
{
case ShaderStage::Vertex: return @"Vertex shader %d";
case ShaderStage::Geometry: return @"Geometry shader %d";
case ShaderStage::Pixel: return @"Pixel shader %d";
case ShaderStage::Compute: return @"Compute shader %d";
}
}
// clang-format on
std::unique_ptr<AbstractShader> Metal::Renderer::CreateShaderFromMSL(ShaderStage stage,
std::string msl,
std::string_view glsl,
std::string_view name)
{
@autoreleasepool
{
NSError* err = nullptr;
auto DumpBadShader = [&](std::string_view msg) {
static int counter = 0;
std::string filename = VideoBackendBase::BadShaderFilename(StageFilename(stage), counter++);
std::ofstream stream(filename);
if (stream.good())
{
stream << msl << std::endl;
stream << "/*" << std::endl;
stream << msg << std::endl;
stream << "Error:" << std::endl;
stream << [[err localizedDescription] UTF8String] << std::endl;
if (!glsl.empty())
{
stream << "Original GLSL:" << std::endl;
stream << glsl << std::endl;
}
else
{
stream << "Shader was created with cached MSL so no GLSL is available." << std::endl;
}
}
stream << std::endl;
stream << "Dolphin Version: " << Common::GetScmRevStr() << std::endl;
stream << "Video Backend: " << g_video_backend->GetDisplayName() << std::endl;
stream << "*/" << std::endl;
stream.close();
PanicAlertFmt("{} (written to {})\n", msg, filename);
};
auto lib = MRCTransfer([g_device newLibraryWithSource:[NSString stringWithUTF8String:msl.data()]
options:nil
error:&err]);
if (err)
{
DumpBadShader(fmt::format("Failed to compile {}", name));
return nullptr;
}
auto fn = MRCTransfer([lib newFunctionWithName:@"main0"]);
if (!fn)
{
DumpBadShader(fmt::format("Shader {} is missing its main0 function", name));
return nullptr;
}
if (!name.empty())
[fn setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data()
length:name.size()
encoding:NSUTF8StringEncoding])];
else
[fn setLabel:[NSString stringWithFormat:GenericShaderName(stage),
m_shader_counter[static_cast<u32>(stage)]++]];
[lib setLabel:[fn label]];
if (stage == ShaderStage::Compute)
{
MTLComputePipelineReflection* reflection = nullptr;
auto desc = [MTLComputePipelineDescriptor new];
[desc setComputeFunction:fn];
[desc setLabel:[fn label]];
MRCOwned<id<MTLComputePipelineState>> pipeline =
MRCTransfer([g_device newComputePipelineStateWithDescriptor:desc
options:MTLPipelineOptionArgumentInfo
reflection:&reflection
error:&err]);
if (err)
{
DumpBadShader(fmt::format("Failed to compile compute pipeline {}", name));
return nullptr;
}
return std::make_unique<ComputePipeline>(stage, reflection, std::move(msl), std::move(fn),
std::move(pipeline));
}
return std::make_unique<Shader>(stage, std::move(msl), std::move(fn));
}
}
std::unique_ptr<NativeVertexFormat>
Metal::Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
@autoreleasepool
{
return std::make_unique<VertexFormat>(vtx_decl);
}
}
std::unique_ptr<AbstractPipeline>
Metal::Renderer::CreatePipeline(const AbstractPipelineConfig& config, const void* cache_data,
size_t cache_data_length)
{
return g_object_cache->CreatePipeline(config);
}
void Metal::Renderer::Flush()
{
@autoreleasepool
{
g_state_tracker->FlushEncoders();
}
}
void Metal::Renderer::WaitForGPUIdle()
{
@autoreleasepool
{
g_state_tracker->FlushEncoders();
g_state_tracker->WaitForFlushedEncoders();
}
}
void Metal::Renderer::OnConfigChanged(u32 bits)
{
if (bits & CONFIG_CHANGE_BIT_VSYNC)
[m_layer setDisplaySyncEnabled:g_ActiveConfig.bVSyncActive];
if (bits & CONFIG_CHANGE_BIT_ANISOTROPY)
{
g_object_cache->ReloadSamplers();
g_state_tracker->ReloadSamplers();
}
}
void Metal::Renderer::ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable,
bool alpha_enable, bool z_enable, u32 color, u32 z)
{
MathUtil::Rectangle<int> target_rc = Renderer::ConvertEFBRectangle(rc);
target_rc.ClampUL(0, 0, m_target_width, m_target_height);
// All Metal render passes are fullscreen, so we can only run a fast clear if the target is too
if (target_rc == MathUtil::Rectangle<int>(0, 0, m_target_width, m_target_height))
{
// Determine whether the EFB has an alpha channel. If it doesn't, we can clear the alpha
// channel to 0xFF. This hopefully allows us to use the fast path in most cases.
if (bpmem.zcontrol.pixel_format == PixelFormat::RGB565_Z16 ||
bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 ||
bpmem.zcontrol.pixel_format == PixelFormat::Z24)
{
// Force alpha writes, and clear the alpha channel. This is different from the other backends,
// where the existing values of the alpha channel are preserved.
alpha_enable = true;
color &= 0x00FFFFFF;
}
bool c_ok = (color_enable && alpha_enable) ||
g_state_tracker->GetCurrentFramebuffer()->GetColorFormat() ==
AbstractTextureFormat::Undefined;
bool z_ok = z_enable || g_state_tracker->GetCurrentFramebuffer()->GetDepthFormat() ==
AbstractTextureFormat::Undefined;
if (c_ok && z_ok)
{
@autoreleasepool
{
// clang-format off
MTLClearColor clear_color = MTLClearColorMake(
static_cast<double>((color >> 16) & 0xFF) / 255.0,
static_cast<double>((color >> 8) & 0xFF) / 255.0,
static_cast<double>((color >> 0) & 0xFF) / 255.0,
static_cast<double>((color >> 24) & 0xFF) / 255.0);
// clang-format on
float z_normalized = static_cast<float>(z & 0xFFFFFF) / 16777216.0f;
if (!g_Config.backend_info.bSupportsReversedDepthRange)
z_normalized = 1.f - z_normalized;
g_state_tracker->BeginClearRenderPass(clear_color, z_normalized);
return;
}
}
}
g_state_tracker->EnableEncoderLabel(false);
g_framebuffer_manager->ClearEFB(rc, color_enable, alpha_enable, z_enable, color, z);
g_state_tracker->EnableEncoderLabel(true);
}
void Metal::Renderer::SetPipeline(const AbstractPipeline* pipeline)
{
g_state_tracker->SetPipeline(static_cast<const Pipeline*>(pipeline));
}
void Metal::Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer)
{
// Shouldn't be bound as a texture.
if (AbstractTexture* color = framebuffer->GetColorAttachment())
g_state_tracker->UnbindTexture(static_cast<Texture*>(color)->GetMTLTexture());
if (AbstractTexture* depth = framebuffer->GetDepthAttachment())
g_state_tracker->UnbindTexture(static_cast<Texture*>(depth)->GetMTLTexture());
m_current_framebuffer = framebuffer;
g_state_tracker->SetCurrentFramebuffer(static_cast<Framebuffer*>(framebuffer));
}
void Metal::Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer)
{
@autoreleasepool
{
SetFramebuffer(framebuffer);
g_state_tracker->BeginRenderPass(MTLLoadActionDontCare);
}
}
void Metal::Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer,
const ClearColor& color_value, float depth_value)
{
@autoreleasepool
{
SetFramebuffer(framebuffer);
MTLClearColor color =
MTLClearColorMake(color_value[0], color_value[1], color_value[2], color_value[3]);
g_state_tracker->BeginClearRenderPass(color, depth_value);
}
}
void Metal::Renderer::SetScissorRect(const MathUtil::Rectangle<int>& rc)
{
g_state_tracker->SetScissor(rc);
}
void Metal::Renderer::SetTexture(u32 index, const AbstractTexture* texture)
{
g_state_tracker->SetTexture(
index, texture ? static_cast<const Texture*>(texture)->GetMTLTexture() : nullptr);
}
void Metal::Renderer::SetSamplerState(u32 index, const SamplerState& state)
{
g_state_tracker->SetSampler(index, state);
}
void Metal::Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write)
{
g_state_tracker->SetComputeTexture(static_cast<const Texture*>(texture));
}
void Metal::Renderer::UnbindTexture(const AbstractTexture* texture)
{
g_state_tracker->UnbindTexture(static_cast<const Texture*>(texture)->GetMTLTexture());
}
void Metal::Renderer::SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth)
{
g_state_tracker->SetViewport(x, y, width, height, near_depth, far_depth);
}
void Metal::Renderer::Draw(u32 base_vertex, u32 num_vertices)
{
@autoreleasepool
{
g_state_tracker->Draw(base_vertex, num_vertices);
}
}
void Metal::Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
{
@autoreleasepool
{
g_state_tracker->DrawIndexed(base_index, num_indices, base_vertex);
}
}
void Metal::Renderer::DispatchComputeShader(const AbstractShader* shader, //
u32 groupsize_x, u32 groupsize_y, u32 groupsize_z,
u32 groups_x, u32 groups_y, u32 groups_z)
{
@autoreleasepool
{
g_state_tracker->SetPipeline(static_cast<const ComputePipeline*>(shader));
g_state_tracker->DispatchComputeShader(groupsize_x, groupsize_y, groupsize_z, //
groups_x, groups_y, groups_z);
}
}
void Metal::Renderer::BindBackbuffer(const ClearColor& clear_color)
{
@autoreleasepool
{
CheckForSurfaceChange();
CheckForSurfaceResize();
m_drawable = MRCRetain([m_layer nextDrawable]);
m_bb_texture->SetMTLTexture(MRCRetain([m_drawable texture]));
SetAndClearFramebuffer(m_backbuffer.get(), clear_color);
}
}
void Metal::Renderer::PresentBackbuffer()
{
@autoreleasepool
{
g_state_tracker->EndRenderPass();
if (m_drawable)
{
[g_state_tracker->GetRenderCmdBuf()
addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }];
m_bb_texture->SetMTLTexture(nullptr);
m_drawable = nullptr;
}
g_state_tracker->FlushEncoders();
}
}
std::unique_ptr<::BoundingBox> Metal::Renderer::CreateBoundingBox() const
{
return std::make_unique<BoundingBox>();
}
void Metal::Renderer::CheckForSurfaceChange()
{
if (!m_surface_changed.TestAndClear())
return;
m_layer = MRCRetain(static_cast<CAMetalLayer*>(m_new_surface_handle));
m_new_surface_handle = nullptr;
SetupSurface();
}
void Metal::Renderer::CheckForSurfaceResize()
{
if (!m_surface_resized.TestAndClear())
return;
SetupSurface();
}
void Metal::Renderer::SetupSurface()
{
CGSize size = [m_layer bounds].size;
// TODO: Update m_backbuffer_scale (need to make doing that not break everything)
const float backbuffer_scale = [m_layer contentsScale];
size.width *= backbuffer_scale;
size.height *= backbuffer_scale;
[m_layer setDrawableSize:size];
m_backbuffer_width = size.width;
m_backbuffer_height = size.height;
TextureConfig cfg(m_backbuffer_width, m_backbuffer_height, 1, 1, 1, m_backbuffer_format,
AbstractTextureFlag_RenderTarget);
m_bb_texture = std::make_unique<Texture>(nullptr, cfg);
m_backbuffer = std::make_unique<Framebuffer>(m_bb_texture.get(), nullptr, //
m_backbuffer_width, m_backbuffer_height, 1, 1);
}

View File

@ -0,0 +1,28 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h"
namespace Metal
{
class Shader : public AbstractShader
{
public:
explicit Shader(ShaderStage stage, std::string msl, MRCOwned<id<MTLFunction>> shader);
~Shader();
id<MTLFunction> GetShader() const { return m_shader; }
BinaryData GetBinary() const override;
private:
std::string m_msl;
MRCOwned<id<MTLFunction>> m_shader;
};
} // namespace Metal

View File

@ -0,0 +1,19 @@
#include "VideoBackends/Metal/MTLShader.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
Metal::Shader::Shader(ShaderStage stage, std::string msl, MRCOwned<id<MTLFunction>> shader)
: AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader))
{
}
Metal::Shader::~Shader()
{
g_object_cache->ShaderDestroyed(this);
}
AbstractShader::BinaryData Metal::Shader::GetBinary() const
{
return BinaryData(m_msl.begin(), m_msl.end());
}

View File

@ -0,0 +1,263 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include <atomic>
#include <memory>
#include <vector>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLTexture.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/RenderBase.h"
namespace Metal
{
class Pipeline;
class ComputePipeline;
class StateTracker
{
public:
enum class UploadBuffer
{
Other,
Uniform,
Vertex,
Index,
TextureData,
Texels,
Last = Texels
};
struct Map
{
id<MTLBuffer> gpu_buffer;
size_t gpu_offset;
void* cpu_buffer;
};
enum class AlignMask : size_t
{
None = 0,
Other = 15,
Uniform = 255,
};
StateTracker(StateTracker&&) = delete;
explicit StateTracker();
~StateTracker();
Framebuffer* GetCurrentFramebuffer() { return m_current_framebuffer; };
void SetCurrentFramebuffer(Framebuffer* framebuffer);
void BeginClearRenderPass(MTLClearColor color, float depth);
void BeginRenderPass(MTLLoadAction load_action);
void BeginRenderPass(MTLRenderPassDescriptor* descriptor);
void BeginComputePass();
MTLRenderPassDescriptor* GetRenderPassDescriptor(Framebuffer* framebuffer,
MTLLoadAction load_action);
void EndRenderPass();
void FlushEncoders();
void WaitForFlushedEncoders();
bool HasUnflushedData() { return static_cast<bool>(m_current_render_cmdbuf); }
bool GPUBusy()
{
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
}
void ReloadSamplers();
void SetPipeline(const Pipeline* pipe);
void SetPipeline(const ComputePipeline* pipe);
void SetScissor(const MathUtil::Rectangle<int>& rect);
void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth);
void SetTexture(u32 idx, id<MTLTexture> texture);
void SetSampler(u32 idx, const SamplerState& sampler);
void SetComputeTexture(const Texture* texture);
void InvalidateUniforms(bool vertex, bool fragment);
void SetUtilityUniform(const void* buffer, size_t size);
void SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1);
void SetVerticesAndIndices(id<MTLBuffer> vertices, id<MTLBuffer> indices);
void SetBBoxBuffer(id<MTLBuffer> bbox, id<MTLFence> upload, id<MTLFence> download);
void SetVertexBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
void SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset);
/// Use around utility draws that are commonly used immediately before gx draws to the same buffer
void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; }
void EnablePerfQuery(PerfQueryGroup group, u32 query_id);
void DisablePerfQuery();
void UnbindTexture(id<MTLTexture> texture);
void Draw(u32 base_vertex, u32 num_vertices);
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex);
void DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x,
u32 groups_y, u32 groups_z);
void ResolveTexture(id<MTLTexture> src, id<MTLTexture> dst, u32 layer, u32 level);
size_t Align(size_t amt, AlignMask align)
{
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
}
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{
Preallocate(buffer_idx, amt);
return CommitPreallocation(buffer_idx, amt, align);
}
std::pair<void*, size_t> Preallocate(UploadBuffer buffer_idx, size_t amt);
/// Must follow a call to Preallocate where amt is >= to the one provided here
Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{
DEBUG_ASSERT((m_upload_buffers[static_cast<int>(buffer_idx)].usage.Pos() &
static_cast<size_t>(align)) == 0);
return CommitPreallocation(buffer_idx, Align(amt, align));
}
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
id<MTLCommandBuffer> GetRenderCmdBuf();
private:
class UsageTracker
{
struct UsageEntry
{
u64 drawno;
size_t pos;
};
std::vector<UsageEntry> m_usage;
size_t m_size = 0;
size_t m_pos = 0;
public:
size_t Size() { return m_size; }
size_t Pos() { return m_pos; }
bool PrepareForAllocation(u64 last_draw, size_t amt);
size_t Allocate(u64 current_draw, size_t amt);
void Reset(size_t new_size);
};
struct Buffer
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr;
};
struct Backref;
struct PerfQueryTracker;
std::shared_ptr<Backref> m_backref;
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_texture_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_current_render_cmdbuf;
MRCOwned<id<MTLCommandBuffer>> m_last_render_cmdbuf;
MRCOwned<id<MTLRenderCommandEncoder>> m_current_render_encoder;
MRCOwned<id<MTLComputeCommandEncoder>> m_current_compute_encoder;
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
Framebuffer* m_current_framebuffer;
Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0};
MRCOwned<id<MTLTexture>> m_dummy_texture;
// MARK: State
u8 m_dirty_textures;
u8 m_dirty_samplers;
union Flags
{
struct
{
// clang-format off
bool has_gx_vs_uniform : 1;
bool has_gx_ps_uniform : 1;
bool has_utility_vs_uniform : 1;
bool has_utility_ps_uniform : 1;
bool has_compute_texture : 1;
bool has_pipeline : 1;
bool has_scissor : 1;
bool has_viewport : 1;
bool has_vertices : 1;
bool has_texel_buffer : 1;
bool bbox_fence : 1;
bool should_apply_label : 1;
// clang-format on
};
u16 bits = 0;
void NewEncoder()
{
Flags reset_mask;
// Set the flags you *don't* want to reset
reset_mask.should_apply_label = true;
bits &= reset_mask.bits;
}
} m_flags;
/// Things that represent the state of the encoder
struct Current
{
NSString* label;
id<MTLRenderPipelineState> pipeline;
std::array<id<MTLBuffer>, 2> vertex_buffers;
std::array<id<MTLBuffer>, 2> fragment_buffers;
u32 width;
u32 height;
MathUtil::Rectangle<int> scissor_rect;
Util::Viewport viewport;
MTLDepthClipMode depth_clip_mode;
MTLCullMode cull_mode;
DepthStencilSelector depth_stencil;
PerfQueryGroup perf_query_group;
} m_current;
std::shared_ptr<PerfQueryTracker> m_current_perf_query;
/// Things that represent what we'd *like* to have on the encoder for the next draw
struct State
{
MathUtil::Rectangle<int> scissor_rect;
Util::Viewport viewport;
const Pipeline* render_pipeline = nullptr;
const ComputePipeline* compute_pipeline = nullptr;
std::array<id<MTLTexture>, 8> textures = {};
std::array<id<MTLSamplerState>, 8> samplers = {};
std::array<float, 8> sampler_min_lod;
std::array<float, 8> sampler_max_lod;
std::array<SamplerState, 8> sampler_states;
const Texture* compute_texture = nullptr;
std::unique_ptr<u8[]> utility_uniform;
u32 utility_uniform_size = 0;
u32 utility_uniform_capacity = 0;
id<MTLBuffer> bbox = nullptr;
id<MTLFence> bbox_upload_fence = nullptr;
id<MTLFence> bbox_download_fence = nullptr;
id<MTLBuffer> vertices = nullptr;
id<MTLBuffer> indices = nullptr;
id<MTLBuffer> texels = nullptr;
u32 texel_buffer_offset0;
u32 texel_buffer_offset1;
PerfQueryGroup perf_query_group = static_cast<PerfQueryGroup>(-1);
} m_state;
u32 m_perf_query_tracker_counter = 0;
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
void SetSamplerForce(u32 idx, const SamplerState& sampler);
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
void CheckViewport();
void CheckScissor();
void PrepareRender();
void PrepareCompute();
};
extern std::unique_ptr<StateTracker> g_state_tracker;
} // namespace Metal

View File

@ -0,0 +1,840 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLStateTracker.h"
#include <algorithm>
#include <mutex>
#include "Common/Assert.h"
#include "Common/BitUtils.h"
#include "VideoBackends/Metal/MTLObjectCache.h"
#include "VideoBackends/Metal/MTLPerfQuery.h"
#include "VideoBackends/Metal/MTLPipeline.h"
#include "VideoBackends/Metal/MTLTexture.h"
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
static constexpr u32 PERF_QUERY_BUFFER_SIZE = 512;
std::unique_ptr<Metal::StateTracker> Metal::g_state_tracker;
struct Metal::StateTracker::Backref
{
std::mutex mtx;
StateTracker* state_tracker;
explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {}
};
struct Metal::StateTracker::PerfQueryTracker
{
MRCOwned<id<MTLBuffer>> buffer;
const u64* contents;
std::vector<PerfQueryGroup> groups;
u32 query_id;
};
static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
{
// clang-format off
switch (buffer)
{
case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data";
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
}
// clang-format on
}
// MARK: - UsageTracker
bool Metal::StateTracker::UsageTracker::PrepareForAllocation(u64 last_draw, size_t amt)
{
auto removeme = std::find_if(m_usage.begin(), m_usage.end(),
[last_draw](UsageEntry usage) { return usage.drawno > last_draw; });
if (removeme != m_usage.begin())
m_usage.erase(m_usage.begin(), removeme);
bool still_in_use = false;
const bool needs_wrap = m_pos + amt > m_size;
if (!m_usage.empty())
{
size_t used = m_usage.front().pos;
if (needs_wrap)
still_in_use = used >= m_pos || used < amt;
else
still_in_use = used >= m_pos && used < m_pos + amt;
}
if (needs_wrap)
m_pos = 0;
return still_in_use || amt > m_size;
}
size_t Metal::StateTracker::UsageTracker::Allocate(u64 current_draw, size_t amt)
{
// Allocation of zero bytes would make the buffer think it's full
// Zero bytes is useless anyways, so don't mark usage in that case
if (!amt)
return m_pos;
if (m_usage.empty() || m_usage.back().drawno != current_draw)
m_usage.push_back({current_draw, m_pos});
size_t ret = m_pos;
m_pos += amt;
return ret;
}
void Metal::StateTracker::UsageTracker::Reset(size_t new_size)
{
m_usage.clear();
m_size = new_size;
m_pos = 0;
}
// MARK: - StateTracker
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
{
m_flags.should_apply_label = true;
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
{
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
[[rpdesc depthAttachment] setStoreAction:MTLStoreActionStore];
[[rpdesc stencilAttachment] setStoreAction:MTLStoreActionStore];
}
m_resolve_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]);
auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0];
[color0 setLoadAction:MTLLoadActionLoad];
[color0 setStoreAction:MTLStoreActionMultisampleResolve];
MTLTextureDescriptor* texdesc =
[MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
width:1
height:1
mipmapped:NO];
[texdesc setTextureType:MTLTextureType2DArray];
[texdesc setUsage:MTLTextureUsageShaderRead];
[texdesc setStorageMode:MTLStorageModePrivate];
m_dummy_texture = MRCTransfer([g_device newTextureWithDescriptor:texdesc]);
[m_dummy_texture setLabel:@"Dummy Texture"];
for (size_t i = 0; i < std::size(m_state.samplers); ++i)
{
SetSamplerForce(i, RenderState::GetLinearSamplerState());
SetTexture(i, m_dummy_texture);
}
}
Metal::StateTracker::~StateTracker()
{
FlushEncoders();
std::lock_guard<std::mutex> lock(m_backref->mtx);
m_backref->state_tracker = nullptr;
}
// MARK: BufferPair Ops
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
{
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
if (__builtin_expect(needs_new, false))
{
// Orphan buffer
size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
while (newsize < amt)
newsize *= 2;
MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.mtlbuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.mtlbuffer contents];
buffer.usage.Reset(newsize);
}
size_t pos = buffer.usage.Pos();
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
}
Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
size_t amt)
{
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
ret.gpu_buffer = buffer.mtlbuffer;
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space");
return ret;
}
// MARK: Render Pass / Encoder Management
id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
{
if (!m_texture_upload_cmdbuf)
{
@autoreleasepool
{
m_texture_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
[m_texture_upload_cmdbuf setLabel:@"Texture Upload"];
m_texture_upload_encoder = MRCRetain([m_texture_upload_cmdbuf blitCommandEncoder]);
[m_texture_upload_encoder setLabel:@"Texture Upload"];
}
}
return m_texture_upload_encoder;
}
id<MTLCommandBuffer> Metal::StateTracker::GetRenderCmdBuf()
{
if (!m_current_render_cmdbuf)
{
@autoreleasepool
{
m_current_render_cmdbuf = MRCRetain([g_queue commandBuffer]);
[m_current_render_cmdbuf setLabel:@"Draw"];
}
}
return m_current_render_cmdbuf;
}
void Metal::StateTracker::SetCurrentFramebuffer(Framebuffer* framebuffer)
{
if (framebuffer == m_current_framebuffer)
return;
EndRenderPass();
m_current_framebuffer = framebuffer;
}
MTLRenderPassDescriptor* Metal::StateTracker::GetRenderPassDescriptor(Framebuffer* framebuffer,
MTLLoadAction load_action)
{
const AbstractTextureFormat depth_fmt = framebuffer->GetDepthFormat();
MTLRenderPassDescriptor* desc;
if (depth_fmt == AbstractTextureFormat::Undefined)
desc = m_render_pass_desc[0];
else if (!Util::HasStencil(depth_fmt))
desc = m_render_pass_desc[1];
else
desc = m_render_pass_desc[2];
desc.colorAttachments[0].texture = framebuffer->GetColor();
desc.colorAttachments[0].loadAction = load_action;
if (depth_fmt != AbstractTextureFormat::Undefined)
{
desc.depthAttachment.texture = framebuffer->GetDepth();
desc.depthAttachment.loadAction = load_action;
if (Util::HasStencil(depth_fmt))
{
desc.stencilAttachment.texture = framebuffer->GetDepth();
desc.stencilAttachment.loadAction = load_action;
}
}
return desc;
}
void Metal::StateTracker::BeginClearRenderPass(MTLClearColor color, float depth)
{
Framebuffer* framebuffer = m_current_framebuffer;
MTLRenderPassDescriptor* desc = GetRenderPassDescriptor(framebuffer, MTLLoadActionClear);
desc.colorAttachments[0].clearColor = color;
if (framebuffer->GetDepthFormat() != AbstractTextureFormat::Undefined)
{
desc.depthAttachment.clearDepth = depth;
if (Util::HasStencil(framebuffer->GetDepthFormat()))
desc.stencilAttachment.clearStencil = 0;
}
BeginRenderPass(desc);
}
void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action)
{
if (m_current_render_encoder)
return;
BeginRenderPass(GetRenderPassDescriptor(m_current_framebuffer, load_action));
}
void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
{
EndRenderPass();
if (m_current_perf_query)
[descriptor setVisibilityResultBuffer:m_current_perf_query->buffer];
m_current_render_encoder =
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
if (m_current_perf_query)
[descriptor setVisibilityResultBuffer:nil];
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
if (!attachment)
attachment = m_current_framebuffer->GetDepthAttachment();
static_assert(std::is_trivially_copyable<decltype(m_current)>::value,
"Make sure we can memset this");
memset(&m_current, 0, sizeof(m_current));
m_current.width = attachment->GetWidth();
m_current.height = attachment->GetHeight();
m_current.scissor_rect = MathUtil::Rectangle<int>(0, 0, m_current.width, m_current.height);
m_current.viewport = {
0.f, 0.f, static_cast<float>(m_current.width), static_cast<float>(m_current.height),
0.f, 1.f};
m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always);
m_current.depth_clip_mode = MTLDepthClipModeClip;
m_current.cull_mode = MTLCullModeNone;
m_current.perf_query_group = static_cast<PerfQueryGroup>(-1);
m_flags.NewEncoder();
m_dirty_samplers = 0xff;
m_dirty_textures = 0xff;
CheckScissor();
CheckViewport();
ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!");
}
void Metal::StateTracker::BeginComputePass()
{
EndRenderPass();
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
[m_current_compute_encoder setLabel:@"Compute"];
m_flags.NewEncoder();
m_dirty_samplers = 0xff;
m_dirty_textures = 0xff;
}
void Metal::StateTracker::EndRenderPass()
{
if (m_current_render_encoder)
{
if (m_flags.bbox_fence && m_state.bbox_download_fence)
[m_current_render_encoder updateFence:m_state.bbox_download_fence
afterStages:MTLRenderStageFragment];
[m_current_render_encoder endEncoding];
m_current_render_encoder = nullptr;
}
if (m_current_compute_encoder)
{
[m_current_compute_encoder endEncoding];
m_current_compute_encoder = nullptr;
}
}
void Metal::StateTracker::FlushEncoders()
{
if (!m_current_render_cmdbuf)
return;
EndRenderPass();
if (m_texture_upload_cmdbuf)
{
[m_texture_upload_encoder endEncoding];
[m_texture_upload_cmdbuf commit];
m_texture_upload_encoder = nullptr;
m_texture_upload_cmdbuf = nullptr;
}
[m_current_render_cmdbuf
addCompletedHandler:[backref = m_backref, draw = m_current_draw,
q = std::move(m_current_perf_query)](id<MTLCommandBuffer> buf) {
std::lock_guard<std::mutex> guard(backref->mtx);
if (StateTracker* tracker = backref->state_tracker)
{
// We can do the update non-atomically because we only ever update under the lock
u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed));
tracker->m_last_finished_draw.store(newval, std::memory_order_release);
if (q)
{
if (PerfQuery* query = static_cast<PerfQuery*>(g_perf_query.get()))
query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id);
tracker->m_perf_query_tracker_cache.emplace_back(std::move(q));
}
}
}];
m_current_perf_query = nullptr;
[m_current_render_cmdbuf commit];
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
m_current_render_cmdbuf = nullptr;
m_current_draw++;
}
void Metal::StateTracker::WaitForFlushedEncoders()
{
[m_last_render_cmdbuf waitUntilCompleted];
}
void Metal::StateTracker::ReloadSamplers()
{
for (size_t i = 0; i < std::size(m_state.samplers); ++i)
m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
}
// MARK: State Setters
void Metal::StateTracker::SetPipeline(const Pipeline* pipe)
{
if (pipe != m_state.render_pipeline)
{
m_state.render_pipeline = pipe;
m_flags.has_pipeline = false;
}
}
void Metal::StateTracker::SetPipeline(const ComputePipeline* pipe)
{
if (pipe != m_state.compute_pipeline)
{
m_state.compute_pipeline = pipe;
m_flags.has_pipeline = false;
}
}
void Metal::StateTracker::SetScissor(const MathUtil::Rectangle<int>& rect)
{
m_state.scissor_rect = rect;
CheckScissor();
}
void Metal::StateTracker::CheckScissor()
{
auto clipped = m_state.scissor_rect;
clipped.ClampUL(0, 0, m_current.width, m_current.height);
m_flags.has_scissor = clipped == m_current.scissor_rect;
}
void Metal::StateTracker::SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth)
{
m_state.viewport = {x, y, width, height, near_depth, far_depth};
CheckViewport();
}
void Metal::StateTracker::CheckViewport()
{
m_flags.has_viewport =
0 == memcmp(&m_state.viewport, &m_current.viewport, sizeof(m_current.viewport));
}
void Metal::StateTracker::SetTexture(u32 idx, id<MTLTexture> texture)
{
ASSERT(idx < std::size(m_state.textures));
if (!texture)
texture = m_dummy_texture;
if (m_state.textures[idx] != texture)
{
m_state.textures[idx] = texture;
m_dirty_textures |= 1 << idx;
}
}
void Metal::StateTracker::SetSamplerForce(u32 idx, const SamplerState& sampler)
{
m_state.samplers[idx] = g_object_cache->GetSampler(sampler);
m_state.sampler_min_lod[idx] = sampler.tm1.min_lod;
m_state.sampler_max_lod[idx] = sampler.tm1.max_lod;
m_state.sampler_states[idx] = sampler;
m_dirty_samplers |= 1 << idx;
}
void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler)
{
ASSERT(idx < std::size(m_state.samplers));
if (m_state.sampler_states[idx] != sampler)
SetSamplerForce(idx, sampler);
}
void Metal::StateTracker::SetComputeTexture(const Texture* texture)
{
if (m_state.compute_texture != texture)
{
m_state.compute_texture = texture;
m_flags.has_compute_texture = false;
}
}
void Metal::StateTracker::UnbindTexture(id<MTLTexture> texture)
{
for (size_t i = 0; i < std::size(m_state.textures); ++i)
{
if (m_state.textures[i] == texture)
{
m_state.textures[i] = m_dummy_texture;
m_dirty_textures |= 1 << i;
}
}
}
void Metal::StateTracker::InvalidateUniforms(bool vertex, bool fragment)
{
m_flags.has_gx_vs_uniform &= !vertex;
m_flags.has_gx_ps_uniform &= !fragment;
}
void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size)
{
if (m_state.utility_uniform_capacity < size)
{
m_state.utility_uniform = std::unique_ptr<u8[]>(new u8[size]);
m_state.utility_uniform_capacity = size;
}
m_state.utility_uniform_size = size;
memcpy(m_state.utility_uniform.get(), buffer, size);
m_flags.has_utility_vs_uniform = false;
m_flags.has_utility_ps_uniform = false;
}
void Metal::StateTracker::SetTexelBuffer(id<MTLBuffer> buffer, u32 offset0, u32 offset1)
{
m_state.texels = buffer;
m_state.texel_buffer_offset0 = offset0;
m_state.texel_buffer_offset1 = offset1;
m_flags.has_texel_buffer = false;
}
void Metal::StateTracker::SetVerticesAndIndices(id<MTLBuffer> vertices, id<MTLBuffer> indices)
{
if (m_state.vertices != vertices)
{
m_flags.has_vertices = false;
m_state.vertices = vertices;
}
m_state.indices = indices;
}
void Metal::StateTracker::SetBBoxBuffer(id<MTLBuffer> bbox, id<MTLFence> upload,
id<MTLFence> download)
{
m_state.bbox = bbox;
m_state.bbox_upload_fence = upload;
m_state.bbox_download_fence = download;
}
void Metal::StateTracker::SetVertexBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset)
{
if (idx < std::size(m_current.vertex_buffers) && m_current.vertex_buffers[idx] == buffer)
{
[m_current_render_encoder setVertexBufferOffset:offset atIndex:idx];
}
else
{
[m_current_render_encoder setVertexBuffer:buffer offset:offset atIndex:idx];
m_current.vertex_buffers[idx] = buffer;
}
}
void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id<MTLBuffer> buffer, u32 offset)
{
if (idx < std::size(m_current.fragment_buffers) && m_current.fragment_buffers[idx] == buffer)
{
[m_current_render_encoder setFragmentBufferOffset:offset atIndex:idx];
}
else
{
[m_current_render_encoder setFragmentBuffer:buffer offset:offset atIndex:idx];
m_current.fragment_buffers[idx] = buffer;
}
}
std::shared_ptr<Metal::StateTracker::PerfQueryTracker> Metal::StateTracker::NewPerfQueryTracker()
{
static_cast<PerfQuery*>(g_perf_query.get())->IncCount();
// The cache is repopulated asynchronously
std::lock_guard<std::mutex> lock(m_backref->mtx);
if (m_perf_query_tracker_cache.empty())
{
// Make a new one
@autoreleasepool
{
std::shared_ptr<PerfQueryTracker> tracker = std::make_shared<PerfQueryTracker>();
const MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked;
id<MTLBuffer> buffer = [g_device newBufferWithLength:PERF_QUERY_BUFFER_SIZE * sizeof(u64)
options:options];
[buffer setLabel:[NSString stringWithFormat:@"PerfQuery Buffer %d",
m_perf_query_tracker_counter++]];
tracker->buffer = MRCTransfer(buffer);
tracker->contents = static_cast<const u64*>([buffer contents]);
return tracker;
}
}
else
{
// Reuse an old one
std::shared_ptr<PerfQueryTracker> tracker = std::move(m_perf_query_tracker_cache.back());
m_perf_query_tracker_cache.pop_back();
return tracker;
}
}
void Metal::StateTracker::EnablePerfQuery(PerfQueryGroup group, u32 query_id)
{
m_state.perf_query_group = group;
if (!m_current_perf_query || m_current_perf_query->query_id != query_id ||
m_current_perf_query->groups.size() == PERF_QUERY_BUFFER_SIZE)
{
if (m_current_render_encoder)
EndRenderPass();
if (!m_current_perf_query)
m_current_perf_query = NewPerfQueryTracker();
m_current_perf_query->groups.clear();
m_current_perf_query->query_id = query_id;
}
}
void Metal::StateTracker::DisablePerfQuery()
{
m_state.perf_query_group = static_cast<PerfQueryGroup>(-1);
}
// MARK: Render
// clang-format off
static constexpr NSString* LABEL_GX = @"GX Draw";
static constexpr NSString* LABEL_UTIL = @"Utility Draw";
// clang-format on
static NSRange RangeOfBits(u32 value)
{
ASSERT(value && "Value must be nonzero");
u32 low = Common::CountTrailingZeros(value);
u32 high = 31 - Common::CountLeadingZeros(value);
return NSMakeRange(low, high + 1 - low);
}
void Metal::StateTracker::PrepareRender()
{
// BeginRenderPass needs this
if (m_state.perf_query_group != static_cast<PerfQueryGroup>(-1) && !m_current_perf_query)
m_current_perf_query = NewPerfQueryTracker();
if (!m_current_render_encoder)
BeginRenderPass(MTLLoadActionLoad);
id<MTLRenderCommandEncoder> enc = m_current_render_encoder;
const Pipeline* pipe = m_state.render_pipeline;
bool is_gx = pipe->Usage() == AbstractPipelineUsage::GX;
NSString* label = is_gx ? LABEL_GX : LABEL_UTIL;
if (m_flags.should_apply_label && m_current.label != label)
{
m_current.label = label;
[m_current_render_encoder setLabel:label];
}
if (!m_flags.has_pipeline)
{
m_flags.has_pipeline = true;
if (pipe->Get() != m_current.pipeline)
{
m_current.pipeline = pipe->Get();
[enc setRenderPipelineState:pipe->Get()];
}
if (pipe->Cull() != m_current.cull_mode)
{
m_current.cull_mode = pipe->Cull();
[enc setCullMode:pipe->Cull()];
}
if (pipe->DepthStencil() != m_current.depth_stencil)
{
m_current.depth_stencil = pipe->DepthStencil();
[enc setDepthStencilState:g_object_cache->GetDepthStencil(m_current.depth_stencil)];
}
MTLDepthClipMode clip = is_gx && g_ActiveConfig.backend_info.bSupportsDepthClamp ?
MTLDepthClipModeClamp :
MTLDepthClipModeClip;
if (clip != m_current.depth_clip_mode)
{
m_current.depth_clip_mode = clip;
[enc setDepthClipMode:clip];
}
if (is_gx && m_state.bbox_upload_fence && !m_flags.bbox_fence && pipe->UsesFragmentBuffer(2))
{
m_flags.bbox_fence = true;
[enc waitForFence:m_state.bbox_upload_fence beforeStages:MTLRenderStageFragment];
[enc setFragmentBuffer:m_state.bbox offset:0 atIndex:2];
}
}
if (!m_flags.has_viewport)
{
m_flags.has_viewport = true;
m_current.viewport = m_state.viewport;
MTLViewport metal;
metal.originX = m_state.viewport.x;
metal.originY = m_state.viewport.y;
metal.width = m_state.viewport.width;
metal.height = m_state.viewport.height;
metal.znear = m_state.viewport.near_depth;
metal.zfar = m_state.viewport.far_depth;
[enc setViewport:metal];
}
if (!m_flags.has_scissor)
{
m_flags.has_scissor = true;
m_current.scissor_rect = m_state.scissor_rect;
m_current.scissor_rect.ClampUL(0, 0, m_current.width, m_current.height);
MTLScissorRect metal;
metal.x = m_current.scissor_rect.left;
metal.y = m_current.scissor_rect.top;
metal.width = m_current.scissor_rect.right - m_current.scissor_rect.left;
metal.height = m_current.scissor_rect.bottom - m_current.scissor_rect.top;
[enc setScissorRect:metal];
}
if (!m_flags.has_vertices && pipe->UsesVertexBuffer(0))
{
m_flags.has_vertices = true;
if (m_state.vertices)
SetVertexBufferNow(0, m_state.vertices, 0);
}
if (u8 dirty = m_dirty_textures & pipe->GetTextures())
{
m_dirty_textures &= ~pipe->GetTextures();
NSRange range = RangeOfBits(dirty);
[enc setFragmentTextures:&m_state.textures[range.location] withRange:range];
}
if (u8 dirty = m_dirty_samplers & pipe->GetSamplers())
{
m_dirty_samplers &= ~pipe->GetSamplers();
NSRange range = RangeOfBits(dirty);
[enc setFragmentSamplerStates:&m_state.samplers[range.location]
lodMinClamps:m_state.sampler_min_lod.data()
lodMaxClamps:m_state.sampler_max_lod.data()
withRange:range];
}
if (m_state.perf_query_group != m_current.perf_query_group)
{
m_current.perf_query_group = m_state.perf_query_group;
if (m_state.perf_query_group == static_cast<PerfQueryGroup>(-1))
{
[enc setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
}
else
{
[enc setVisibilityResultMode:MTLVisibilityResultModeCounting
offset:m_current_perf_query->groups.size() * 8];
m_current_perf_query->groups.push_back(m_state.perf_query_group);
}
}
if (is_gx)
{
// GX draw
if (!m_flags.has_gx_vs_uniform)
{
m_flags.has_gx_vs_uniform = true;
Map map = Allocate(UploadBuffer::Uniform, sizeof(VertexShaderConstants), AlignMask::Uniform);
memcpy(map.cpu_buffer, &VertexShaderManager::constants, sizeof(VertexShaderConstants));
SetVertexBufferNow(1, map.gpu_buffer, map.gpu_offset);
if (pipe->UsesFragmentBuffer(1))
SetFragmentBufferNow(1, map.gpu_buffer, map.gpu_offset);
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed,
Align(sizeof(VertexShaderConstants), AlignMask::Uniform));
}
if (!m_flags.has_gx_ps_uniform)
{
m_flags.has_gx_ps_uniform = true;
Map map = Allocate(UploadBuffer::Uniform, sizeof(PixelShaderConstants), AlignMask::Uniform);
memcpy(map.cpu_buffer, &PixelShaderManager::constants, sizeof(PixelShaderConstants));
SetFragmentBufferNow(0, map.gpu_buffer, map.gpu_offset);
ADDSTAT(g_stats.this_frame.bytes_uniform_streamed,
Align(sizeof(PixelShaderConstants), AlignMask::Uniform));
}
}
else
{
// Utility draw
if (!m_flags.has_utility_vs_uniform && pipe->UsesVertexBuffer(1))
{
m_flags.has_utility_vs_uniform = true;
m_flags.has_gx_vs_uniform = false;
[enc setVertexBytes:m_state.utility_uniform.get()
length:m_state.utility_uniform_size
atIndex:1];
}
if (!m_flags.has_utility_ps_uniform && pipe->UsesFragmentBuffer(0))
{
m_flags.has_utility_ps_uniform = true;
m_flags.has_gx_ps_uniform = false;
[enc setFragmentBytes:m_state.utility_uniform.get()
length:m_state.utility_uniform_size
atIndex:0];
}
if (!m_flags.has_texel_buffer && pipe->UsesFragmentBuffer(2))
{
m_flags.has_texel_buffer = true;
SetFragmentBufferNow(2, m_state.texels, m_state.texel_buffer_offset0);
}
}
}
void Metal::StateTracker::PrepareCompute()
{
if (!m_current_compute_encoder)
BeginComputePass();
id<MTLComputeCommandEncoder> enc = m_current_compute_encoder;
const ComputePipeline* pipe = m_state.compute_pipeline;
if (!m_flags.has_pipeline)
{
m_flags.has_pipeline = true;
[enc setComputePipelineState:pipe->GetComputePipeline()];
}
if (!m_flags.has_compute_texture && pipe->UsesTexture(0))
{
m_flags.has_compute_texture = true;
[enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0];
}
// Compute and render can't happen at the same time, so just reuse one of the flags
if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0))
{
m_flags.has_utility_vs_uniform = true;
[enc setBytes:m_state.utility_uniform.get() length:m_state.utility_uniform_size atIndex:0];
}
if (!m_flags.has_texel_buffer && pipe->UsesBuffer(2))
{
m_flags.has_texel_buffer = true;
[enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset0 atIndex:2];
if (pipe->UsesBuffer(3))
[enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset1 atIndex:3];
}
}
void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices)
{
if (!num_vertices)
return;
PrepareRender();
[m_current_render_encoder drawPrimitives:m_state.render_pipeline->Prim()
vertexStart:base_vertex
vertexCount:num_vertices];
}
void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
{
if (!num_indices) // Happens in Metroid Prime, Metal API validation doesn't like this
return;
PrepareRender();
[m_current_render_encoder drawIndexedPrimitives:m_state.render_pipeline->Prim()
indexCount:num_indices
indexType:MTLIndexTypeUInt16
indexBuffer:m_state.indices
indexBufferOffset:base_index * sizeof(u16)
instanceCount:1
baseVertex:base_vertex
baseInstance:0];
}
void Metal::StateTracker::DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z,
u32 groups_x, u32 groups_y, u32 groups_z)
{
PrepareCompute();
[m_current_compute_encoder
dispatchThreadgroups:MTLSizeMake(groups_x, groups_y, groups_z)
threadsPerThreadgroup:MTLSizeMake(groupsize_x, groupsize_y, groupsize_z)];
}
void Metal::StateTracker::ResolveTexture(id<MTLTexture> src, id<MTLTexture> dst, u32 layer,
u32 level)
{
EndRenderPass();
auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0];
[color0 setTexture:src];
[color0 setResolveTexture:dst];
[color0 setResolveSlice:layer];
[color0 setResolveLevel:level];
id<MTLRenderCommandEncoder> enc =
[GetRenderCmdBuf() renderCommandEncoderWithDescriptor:m_resolve_pass_desc];
[enc setLabel:@"Multisample Resolve"];
[enc endEncoding];
}

View File

@ -0,0 +1,77 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractStagingTexture.h"
#include "VideoCommon/AbstractTexture.h"
#include "VideoBackends/Metal/MRCHelpers.h"
namespace Metal
{
class Texture final : public AbstractTexture
{
public:
explicit Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config);
~Texture();
void CopyRectangleFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect, u32 src_layer,
u32 src_level, const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level) override;
void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& rect,
u32 layer, u32 level) override;
void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) override;
id<MTLTexture> GetMTLTexture() const { return m_tex; }
void SetMTLTexture(MRCOwned<id<MTLTexture>> tex) { m_tex = std::move(tex); }
private:
MRCOwned<id<MTLTexture>> m_tex;
};
class StagingTexture final : public AbstractStagingTexture
{
public:
StagingTexture(MRCOwned<id<MTLBuffer>> buffer, StagingTextureType type,
const TextureConfig& config);
~StagingTexture();
void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle<int>& src_rect,
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect) override;
void CopyToTexture(const MathUtil::Rectangle<int>& src_rect, AbstractTexture* dst,
const MathUtil::Rectangle<int>& dst_rect, u32 dst_layer,
u32 dst_level) override;
bool Map() override;
void Unmap() override;
void Flush() override;
private:
MRCOwned<id<MTLBuffer>> m_buffer;
MRCOwned<id<MTLCommandBuffer>> m_wait_buffer;
};
class Framebuffer final : public AbstractFramebuffer
{
public:
Framebuffer(AbstractTexture* color, AbstractTexture* depth, u32 width, u32 height, u32 layers,
u32 samples);
~Framebuffer();
id<MTLTexture> GetColor() const
{
return static_cast<Texture*>(GetColorAttachment())->GetMTLTexture();
}
id<MTLTexture> GetDepth() const
{
return static_cast<Texture*>(GetDepthAttachment())->GetMTLTexture();
}
};
} // namespace Metal

View File

@ -0,0 +1,180 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLTexture.h"
#include "Common/Align.h"
#include "Common/Assert.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config)
: AbstractTexture(config), m_tex(std::move(tex))
{
}
Metal::Texture::~Texture()
{
if (g_state_tracker)
g_state_tracker->UnbindTexture(m_tex);
}
void Metal::Texture::CopyRectangleFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect,
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect,
u32 dst_layer, u32 dst_level)
{
g_state_tracker->EndRenderPass();
id<MTLTexture> msrc = static_cast<const Texture*>(src)->GetMTLTexture();
id<MTLBlitCommandEncoder> blit = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder];
MTLSize size = MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
[blit setLabel:@"Texture Copy"];
[blit copyFromTexture:msrc
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0)
sourceSize:size
toTexture:m_tex
destinationSlice:dst_layer
destinationLevel:dst_level
destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)];
[blit endEncoding];
}
void Metal::Texture::ResolveFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& rect, u32 layer, u32 level)
{
ASSERT(rect == MathUtil::Rectangle<int>(0, 0, src->GetWidth(), src->GetHeight()));
id<MTLTexture> src_tex = static_cast<const Texture*>(src)->GetMTLTexture();
g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level);
}
void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
const u8* buffer, size_t buffer_size)
{
@autoreleasepool
{
const u32 block_size = GetBlockSizeForFormat(GetFormat());
const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
const u32 upload_size = source_pitch * num_rows;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData,
upload_size, StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, buffer, upload_size);
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
[encoder copyFromBuffer:map.gpu_buffer
sourceOffset:map.gpu_offset
sourceBytesPerRow:source_pitch
sourceBytesPerImage:upload_size
sourceSize:MTLSizeMake(width, height, 1)
toTexture:m_tex
destinationSlice:0
destinationLevel:level
destinationOrigin:MTLOriginMake(0, 0, 0)];
}
}
Metal::StagingTexture::StagingTexture(MRCOwned<id<MTLBuffer>> buffer, StagingTextureType type,
const TextureConfig& config)
: AbstractStagingTexture(type, config), m_buffer(std::move(buffer))
{
m_map_pointer = static_cast<char*>([m_buffer contents]);
m_map_stride = config.GetStride();
}
Metal::StagingTexture::~StagingTexture() = default;
void Metal::StagingTexture::CopyFromTexture(const AbstractTexture* src,
const MathUtil::Rectangle<int>& src_rect, //
u32 src_layer, u32 src_level,
const MathUtil::Rectangle<int>& dst_rect)
{
@autoreleasepool
{
const size_t stride = m_config.GetStride();
const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size;
const MTLSize size =
MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
g_state_tracker->EndRenderPass();
m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf());
id<MTLBlitCommandEncoder> download_encoder = [m_wait_buffer blitCommandEncoder];
[download_encoder setLabel:@"Texture Download"];
[download_encoder copyFromTexture:static_cast<const Texture*>(src)->GetMTLTexture()
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0)
sourceSize:size
toBuffer:m_buffer
destinationOffset:offset
destinationBytesPerRow:stride
destinationBytesPerImage:stride * size.height];
[download_encoder endEncoding];
m_needs_flush = true;
}
}
void Metal::StagingTexture::CopyToTexture(const MathUtil::Rectangle<int>& src_rect, //
AbstractTexture* dst,
const MathUtil::Rectangle<int>& dst_rect, //
u32 dst_layer, u32 dst_level)
{
@autoreleasepool
{
const size_t stride = m_config.GetStride();
const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size;
const MTLSize size =
MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1);
g_state_tracker->EndRenderPass();
m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf());
id<MTLBlitCommandEncoder> upload_encoder = [m_wait_buffer blitCommandEncoder];
[upload_encoder setLabel:@"Texture Upload"];
[upload_encoder copyFromBuffer:m_buffer
sourceOffset:offset
sourceBytesPerRow:stride
sourceBytesPerImage:stride * size.height
sourceSize:size
toTexture:static_cast<Texture*>(dst)->GetMTLTexture()
destinationSlice:dst_layer
destinationLevel:dst_level
destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)];
[upload_encoder endEncoding];
m_needs_flush = true;
}
}
bool Metal::StagingTexture::Map()
{
// Always mapped
return true;
}
void Metal::StagingTexture::Unmap()
{
// Always mapped
}
void Metal::StagingTexture::Flush()
{
m_needs_flush = false;
if (!m_wait_buffer)
return;
if ([m_wait_buffer status] != MTLCommandBufferStatusCompleted)
{
// Flush while we wait, since who knows how long we'll be sitting here
g_state_tracker->FlushEncoders();
[m_wait_buffer waitUntilCompleted];
}
m_wait_buffer = nullptr;
}
Metal::Framebuffer::Framebuffer(AbstractTexture* color, AbstractTexture* depth, //
u32 width, u32 height, u32 layers, u32 samples)
: AbstractFramebuffer(color, depth,
color ? color->GetFormat() : AbstractTextureFormat::Undefined, //
depth ? depth->GetFormat() : AbstractTextureFormat::Undefined, //
width, height, layers, samples)
{
}
Metal::Framebuffer::~Framebuffer() = default;

View File

@ -0,0 +1,53 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include <vector>
#include "VideoCommon/AbstractShader.h"
#include "VideoCommon/TextureConfig.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoBackends/Metal/MRCHelpers.h"
namespace Metal
{
struct DeviceFeatures
{
bool subgroup_ops;
};
extern DeviceFeatures g_features;
namespace Util
{
struct Viewport
{
float x;
float y;
float width;
float height;
float near_depth;
float far_depth;
};
/// Gets the list of Metal devices, ordered so the system default device is first
std::vector<MRCOwned<id<MTLDevice>>> GetAdapterList();
void PopulateBackendInfo(VideoConfig* config);
void PopulateBackendInfoAdapters(VideoConfig* config,
const std::vector<MRCOwned<id<MTLDevice>>>& adapters);
void PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice> device);
AbstractTextureFormat ToAbstract(MTLPixelFormat format);
MTLPixelFormat FromAbstract(AbstractTextureFormat format);
static inline bool HasStencil(AbstractTextureFormat format)
{
return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8;
}
std::optional<std::string> TranslateShaderToMSL(ShaderStage stage, std::string_view source);
} // namespace Util
} // namespace Metal

View File

@ -0,0 +1,464 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLUtil.h"
#include <fstream>
#include <string>
#include <spirv_msl.hpp>
#include "Common/MsgHandler.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/Spirv.h"
Metal::DeviceFeatures Metal::g_features;
std::vector<MRCOwned<id<MTLDevice>>> Metal::Util::GetAdapterList()
{
std::vector<MRCOwned<id<MTLDevice>>> list;
id<MTLDevice> default_dev = MTLCreateSystemDefaultDevice();
if (default_dev)
list.push_back(MRCTransfer(default_dev));
auto devices = MRCTransfer(MTLCopyAllDevices());
for (id<MTLDevice> device in devices.Get())
{
if (device != default_dev)
list.push_back(MRCRetain(device));
}
return list;
}
void Metal::Util::PopulateBackendInfo(VideoConfig* config)
{
config->backend_info.api_type = APIType::Metal;
config->backend_info.bUsesLowerLeftOrigin = false;
config->backend_info.bSupportsExclusiveFullscreen = false;
config->backend_info.bSupportsDualSourceBlend = true;
config->backend_info.bSupportsPrimitiveRestart = true;
config->backend_info.bSupportsGeometryShaders = false;
config->backend_info.bSupportsComputeShaders = true;
config->backend_info.bSupports3DVision = false;
config->backend_info.bSupportsEarlyZ = true;
config->backend_info.bSupportsBindingLayout = true;
config->backend_info.bSupportsBBox = true;
config->backend_info.bSupportsGSInstancing = false;
config->backend_info.bSupportsPostProcessing = true;
config->backend_info.bSupportsPaletteConversion = true;
config->backend_info.bSupportsClipControl = true;
config->backend_info.bSupportsSSAA = true;
config->backend_info.bSupportsFragmentStoresAndAtomics = true;
config->backend_info.bSupportsReversedDepthRange = false;
config->backend_info.bSupportsLogicOp = false;
config->backend_info.bSupportsMultithreading = false;
config->backend_info.bSupportsGPUTextureDecoding = true;
config->backend_info.bSupportsCopyToVram = true;
config->backend_info.bSupportsBitfield = true;
config->backend_info.bSupportsDynamicSamplerIndexing = true;
config->backend_info.bSupportsFramebufferFetch = false;
config->backend_info.bSupportsBackgroundCompiling = true;
config->backend_info.bSupportsLargePoints = true;
config->backend_info.bSupportsPartialDepthCopies = true;
config->backend_info.bSupportsDepthReadback = true;
config->backend_info.bSupportsShaderBinaries = false;
config->backend_info.bSupportsPipelineCacheData = false;
config->backend_info.bSupportsCoarseDerivatives = false;
config->backend_info.bSupportsTextureQueryLevels = true;
config->backend_info.bSupportsLodBiasInSampler = false;
config->backend_info.bSupportsSettingObjectNames = true;
// Metal requires multisample resolve to be done on a render pass
config->backend_info.bSupportsPartialMultisampleResolve = false;
}
void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config,
const std::vector<MRCOwned<id<MTLDevice>>>& adapters)
{
config->backend_info.Adapters.clear();
for (id<MTLDevice> adapter : adapters)
{
config->backend_info.Adapters.push_back([[adapter name] UTF8String]);
}
}
static bool DetectIntelGPUFBFetch(id<MTLDevice> dev)
{
// Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch!
// Annoyingly, the Haswell compiler successfully makes a pipeline but actually miscompiles it and
// doesn't insert any fbfetch instructions.
// The Broadwell compiler inserts the Skylake fbfetch instruction,
// but Broadwell doesn't support that. It seems to make the shader not do anything.
// So we actually have to test the thing
static constexpr const char* shader = R"(
vertex float4 fs_triangle(uint vid [[vertex_id]]) {
return float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1);
}
fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) {
return in * 2;
}
)";
auto lib = MRCTransfer([dev newLibraryWithSource:[NSString stringWithUTF8String:shader]
options:nil
error:nil]);
if (!lib)
return false;
auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[pdesc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])];
[pdesc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"fbfetch_test"])];
[[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm];
auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]);
if (!pipe)
return false;
auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]);
auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
width:1
height:1
mipmapped:false];
[tdesc setUsage:MTLTextureUsageRenderTarget];
auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]);
auto q = MRCTransfer([dev newCommandQueue]);
u32 px = 0x11223344;
memcpy([buf contents], &px, 4);
id<MTLCommandBuffer> cmdbuf = [q commandBuffer];
id<MTLBlitCommandEncoder> upload_encoder = [cmdbuf blitCommandEncoder];
[upload_encoder copyFromBuffer:buf
sourceOffset:0
sourceBytesPerRow:4
sourceBytesPerImage:4
sourceSize:MTLSizeMake(1, 1, 1)
toTexture:tex
destinationSlice:0
destinationLevel:0
destinationOrigin:MTLOriginMake(0, 0, 0)];
[upload_encoder endEncoding];
auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
[[rpdesc colorAttachments][0] setTexture:tex];
[[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad];
[[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore];
id<MTLRenderCommandEncoder> renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc];
[renc setRenderPipelineState:pipe];
[renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
[renc endEncoding];
id<MTLBlitCommandEncoder> download_encoder = [cmdbuf blitCommandEncoder];
[download_encoder copyFromTexture:tex
sourceSlice:0
sourceLevel:0
sourceOrigin:MTLOriginMake(0, 0, 0)
sourceSize:MTLSizeMake(1, 1, 1)
toBuffer:buf
destinationOffset:0
destinationBytesPerRow:4
destinationBytesPerImage:4];
[download_encoder endEncoding];
[cmdbuf commit];
[cmdbuf waitUntilCompleted];
u32 outpx;
memcpy(&outpx, [buf contents], 4);
// Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing
if (outpx == 0x22446688)
return true; // Skylake+
else if (outpx == 0x11223344)
return false; // Broadwell
else
return false; // Haswell
}
void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice> device)
{
// Initialize DriverDetails first so we can use it later
DriverDetails::Vendor vendor = DriverDetails::VENDOR_UNKNOWN;
if ([[device name] containsString:@"NVIDIA"])
vendor = DriverDetails::VENDOR_NVIDIA;
else if ([[device name] containsString:@"AMD"])
vendor = DriverDetails::VENDOR_ATI;
else if ([[device name] containsString:@"Intel"])
vendor = DriverDetails::VENDOR_INTEL;
else if ([[device name] containsString:@"Apple"])
vendor = DriverDetails::VENDOR_APPLE;
const NSOperatingSystemVersion cocoa_ver = [[NSProcessInfo processInfo] operatingSystemVersion];
double version = cocoa_ver.majorVersion * 100 + cocoa_ver.minorVersion;
DriverDetails::Init(DriverDetails::API_METAL, vendor, DriverDetails::DRIVER_APPLE, version,
DriverDetails::Family::UNKNOWN);
#if TARGET_OS_OSX
config->backend_info.bSupportsDepthClamp = true;
config->backend_info.bSupportsST3CTextures = true;
config->backend_info.bSupportsBPTCTextures = true;
#else
bool supports_mac1 = false;
bool supports_apple4 = false;
if (@available(iOS 13, *))
{
supports_mac1 = [device supportsFamily:MTLGPUFamilyMac1];
supports_apple4 = [device supportsFamily:MTLGPUFamilyApple4];
}
else
{
supports_apple4 = [device supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1];
}
config->backend_info.bSupportsDepthClamp = supports_mac1 || supports_apple4;
config->backend_info.bSupportsST3CTextures = supports_mac1;
config->backend_info.bSupportsBPTCTextures = supports_mac1;
config->backend_info.bSupportsFramebufferFetch = true;
#endif
config->backend_info.AAModes.clear();
for (u32 i = 1; i <= 64; i <<= 1)
{
if ([device supportsTextureSampleCount:i])
config->backend_info.AAModes.push_back(i);
}
g_features.subgroup_ops = false;
if (@available(macOS 10.15, iOS 13, *))
{
// Requires SIMD-scoped reduction operations
g_features.subgroup_ops =
[device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6];
config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1];
}
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_INVOCATION_ID))
g_features.subgroup_ops = false;
#if TARGET_OS_OSX
if (@available(macOS 11, *))
if (vendor == DriverDetails::VENDOR_INTEL)
config->backend_info.bSupportsFramebufferFetch |= DetectIntelGPUFBFetch(device);
#endif
if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING))
config->backend_info.bSupportsDynamicSamplerIndexing = false;
}
// clang-format off
AbstractTextureFormat Metal::Util::ToAbstract(MTLPixelFormat format)
{
switch (format)
{
case MTLPixelFormatRGBA8Unorm: return AbstractTextureFormat::RGBA8;
case MTLPixelFormatBGRA8Unorm: return AbstractTextureFormat::BGRA8;
case MTLPixelFormatBC1_RGBA: return AbstractTextureFormat::DXT1;
case MTLPixelFormatBC2_RGBA: return AbstractTextureFormat::DXT3;
case MTLPixelFormatBC3_RGBA: return AbstractTextureFormat::DXT5;
case MTLPixelFormatBC7_RGBAUnorm: return AbstractTextureFormat::BPTC;
case MTLPixelFormatR16Unorm: return AbstractTextureFormat::R16;
case MTLPixelFormatDepth16Unorm: return AbstractTextureFormat::D16;
case MTLPixelFormatDepth24Unorm_Stencil8: return AbstractTextureFormat::D24_S8;
case MTLPixelFormatR32Float: return AbstractTextureFormat::R32F;
case MTLPixelFormatDepth32Float: return AbstractTextureFormat::D32F;
case MTLPixelFormatDepth32Float_Stencil8: return AbstractTextureFormat::D32F_S8;
default: return AbstractTextureFormat::Undefined;
}
}
MTLPixelFormat Metal::Util::FromAbstract(AbstractTextureFormat format)
{
switch (format)
{
case AbstractTextureFormat::RGBA8: return MTLPixelFormatRGBA8Unorm;
case AbstractTextureFormat::BGRA8: return MTLPixelFormatBGRA8Unorm;
case AbstractTextureFormat::DXT1: return MTLPixelFormatBC1_RGBA;
case AbstractTextureFormat::DXT3: return MTLPixelFormatBC2_RGBA;
case AbstractTextureFormat::DXT5: return MTLPixelFormatBC3_RGBA;
case AbstractTextureFormat::BPTC: return MTLPixelFormatBC7_RGBAUnorm;
case AbstractTextureFormat::R16: return MTLPixelFormatR16Unorm;
case AbstractTextureFormat::D16: return MTLPixelFormatDepth16Unorm;
case AbstractTextureFormat::D24_S8: return MTLPixelFormatDepth24Unorm_Stencil8;
case AbstractTextureFormat::R32F: return MTLPixelFormatR32Float;
case AbstractTextureFormat::D32F: return MTLPixelFormatDepth32Float;
case AbstractTextureFormat::D32F_S8: return MTLPixelFormatDepth32Float_Stencil8;
case AbstractTextureFormat::Undefined: return MTLPixelFormatInvalid;
}
}
// clang-format on
// MARK: Shader Translation
static const std::string_view SHADER_HEADER = R"(
// Target GLSL 4.5.
#version 450 core
// Always available on Metal
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#define ATTRIBUTE_LOCATION(x) layout(location = x)
#define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y)
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
#define SAMPLER_BINDING(x) layout(set = 1, binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8))
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
#define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z)
#define VARYING_LOCATION(x) layout(location = x)
#define FORCE_EARLY_Z layout(early_fragment_tests) in
// Metal framebuffer fetch helpers.
#define FB_FETCH_VALUE subpassLoad(in_ocol0)
// hlsl to glsl function translation
#define API_METAL 1
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
// These were changed in Vulkan
#define gl_VertexID gl_VertexIndex
#define gl_InstanceID gl_InstanceIndex
)";
static const std::string_view COMPUTE_SHADER_HEADER = R"(
// Target GLSL 4.5.
#version 450 core
// Always available on Metal
#extension GL_EXT_shader_8bit_storage : require
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1))
#define SSBO_BINDING(x) layout(std430, set = 2, binding = x)
#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x)
// hlsl to glsl function translation
#define API_METAL 1
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
)";
static const std::string_view SUBGROUP_HELPER_HEADER = R"(
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
static const std::string_view MSL_HEADER =
// We know our shader generator leaves unused variables.
"#pragma clang diagnostic ignored \"-Wunused-variable\"\n"
// These are usually when the compiler doesn't think a switch is exhaustive
"#pragma clang diagnostic ignored \"-Wreturn-type\"\n";
static constexpr spirv_cross::MSLResourceBinding
MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, //
u32 msl_buffer, u32 msl_texture, u32 msl_sampler)
{
spirv_cross::MSLResourceBinding resource;
resource.stage = stage;
resource.desc_set = set;
resource.binding = binding;
resource.msl_buffer = msl_buffer;
resource.msl_texture = msl_texture;
resource.msl_sampler = msl_sampler;
return resource;
}
std::optional<std::string> Metal::Util::TranslateShaderToMSL(ShaderStage stage,
std::string_view source)
{
std::string full_source;
std::string_view header = stage == ShaderStage::Compute ? COMPUTE_SHADER_HEADER : SHADER_HEADER;
full_source.reserve(header.size() + SUBGROUP_HELPER_HEADER.size() + source.size());
full_source.append(header);
if (Metal::g_features.subgroup_ops)
full_source.append(SUBGROUP_HELPER_HEADER);
full_source.append(source);
std::optional<SPIRV::CodeVector> code;
switch (stage)
{
case ShaderStage::Vertex:
code = SPIRV::CompileVertexShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3);
break;
case ShaderStage::Geometry:
PanicAlertFmt("Tried to compile geometry shader for Metal, but Metal doesn't support them!");
break;
case ShaderStage::Pixel:
code = SPIRV::CompileFragmentShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3);
break;
case ShaderStage::Compute:
code = SPIRV::CompileComputeShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3);
break;
}
if (!code.has_value())
return std::nullopt;
// clang-format off
static const spirv_cross::MSLResourceBinding resource_bindings[] = {
MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo
MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0
MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1
MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2
MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3
MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4
MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5
MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6
MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7
MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8
MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo
MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo
MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo
};
spirv_cross::CompilerMSL::Options options;
#if TARGET_OS_OSX
options.platform = spirv_cross::CompilerMSL::Options::macOS;
#elif TARGET_OS_IOS
options.platform = spirv_cross::CompilerMSL::Options::iOS;
#else
#error What platform is this?
#endif
// clang-format on
spirv_cross::CompilerMSL compiler(std::move(*code));
if (@available(macOS 11, iOS 14, *))
options.set_msl_version(2, 3);
else if (@available(macOS 10.15, iOS 13, *))
options.set_msl_version(2, 2);
else if (@available(macOS 10.14, iOS 12, *))
options.set_msl_version(2, 1);
else
options.set_msl_version(2, 0);
options.use_framebuffer_fetch_subpasses = true;
compiler.set_msl_options(options);
for (auto& binding : resource_bindings)
compiler.add_msl_resource_binding(binding);
std::string msl(MSL_HEADER);
msl += compiler.compile();
return msl;
}

View File

@ -0,0 +1,23 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <Metal/Metal.h>
#include "VideoBackends/Metal/MRCHelpers.h"
#include "VideoCommon/NativeVertexFormat.h"
namespace Metal
{
class VertexFormat : public NativeVertexFormat
{
public:
VertexFormat(const PortableVertexDeclaration& vtx_decl);
MTLVertexDescriptor* Get() const { return m_desc; }
MRCOwned<MTLVertexDescriptor*> m_desc;
};
} // namespace Metal

View File

@ -0,0 +1,143 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLVertexFormat.h"
#include "VideoCommon/VertexShaderGen.h"
static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format)
{
// clang-format off
if (int_format)
{
switch (format)
{
case ComponentFormat::UByte:
switch (count)
{
case 1: return MTLVertexFormatUChar;
case 2: return MTLVertexFormatUChar2;
case 3: return MTLVertexFormatUChar3;
case 4: return MTLVertexFormatUChar4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Byte:
switch (count)
{
case 1: return MTLVertexFormatChar;
case 2: return MTLVertexFormatChar2;
case 3: return MTLVertexFormatChar3;
case 4: return MTLVertexFormatChar4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::UShort:
switch (count)
{
case 1: return MTLVertexFormatUShort;
case 2: return MTLVertexFormatUShort2;
case 3: return MTLVertexFormatUShort3;
case 4: return MTLVertexFormatUShort4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Short:
switch (count)
{
case 1: return MTLVertexFormatShort;
case 2: return MTLVertexFormatShort2;
case 3: return MTLVertexFormatShort3;
case 4: return MTLVertexFormatShort4;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Float:
switch (count)
{
case 1: return MTLVertexFormatFloat;
case 2: return MTLVertexFormatFloat2;
case 3: return MTLVertexFormatFloat3;
case 4: return MTLVertexFormatFloat4;
default: return MTLVertexFormatInvalid;
}
}
}
else
{
switch (format)
{
case ComponentFormat::UByte:
switch (count)
{
case 1: return MTLVertexFormatUCharNormalized;
case 2: return MTLVertexFormatUChar2Normalized;
case 3: return MTLVertexFormatUChar3Normalized;
case 4: return MTLVertexFormatUChar4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Byte:
switch (count)
{
case 1: return MTLVertexFormatCharNormalized;
case 2: return MTLVertexFormatChar2Normalized;
case 3: return MTLVertexFormatChar3Normalized;
case 4: return MTLVertexFormatChar4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::UShort:
switch (count)
{
case 1: return MTLVertexFormatUShortNormalized;
case 2: return MTLVertexFormatUShort2Normalized;
case 3: return MTLVertexFormatUShort3Normalized;
case 4: return MTLVertexFormatUShort4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Short:
switch (count)
{
case 1: return MTLVertexFormatShortNormalized;
case 2: return MTLVertexFormatShort2Normalized;
case 3: return MTLVertexFormatShort3Normalized;
case 4: return MTLVertexFormatShort4Normalized;
default: return MTLVertexFormatInvalid;
}
case ComponentFormat::Float:
switch (count)
{
case 1: return MTLVertexFormatFloat;
case 2: return MTLVertexFormatFloat2;
case 3: return MTLVertexFormatFloat3;
case 4: return MTLVertexFormatFloat4;
default: return MTLVertexFormatInvalid;
}
}
}
// clang-format on
}
static void SetAttribute(MTLVertexDescriptor* desc, u32 attribute, const AttributeFormat& format)
{
if (!format.enable)
return;
MTLVertexAttributeDescriptor* attr_desc = [[desc attributes] objectAtIndexedSubscript:attribute];
[attr_desc setFormat:ConvertFormat(format.type, format.components, format.integer)];
[attr_desc setOffset:format.offset];
[attr_desc setBufferIndex:0];
}
template <size_t N>
static void SetAttributes(MTLVertexDescriptor* desc, u32 attribute,
const AttributeFormat (&format)[N])
{
for (size_t i = 0; i < N; ++i)
SetAttribute(desc, attribute + i, format[i]);
}
Metal::VertexFormat::VertexFormat(const PortableVertexDeclaration& vtx_decl)
: NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new]))
{
[[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride];
SetAttribute(m_desc, SHADER_POSITION_ATTRIB, vtx_decl.position);
SetAttributes(m_desc, SHADER_NORMAL_ATTRIB, vtx_decl.normals);
SetAttributes(m_desc, SHADER_COLOR0_ATTRIB, vtx_decl.colors);
SetAttributes(m_desc, SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords);
SetAttribute(m_desc, SHADER_POSMTX_ATTRIB, vtx_decl.posmtx);
}

View File

@ -0,0 +1,34 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "VideoBackends/Metal/MTLUtil.h"
#include "VideoCommon/VertexManagerBase.h"
namespace Metal
{
class VertexManager final : public VertexManagerBase
{
public:
VertexManager();
~VertexManager() override;
void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format,
u32* out_offset) override;
bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format, u32* out_palette_offset) override;
protected:
void ResetBuffer(u32 vertex_stride) override;
void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex,
u32* out_base_index) override;
void UploadUniforms() override;
private:
u32 m_vertex_offset;
u32 m_base_vertex;
};
} // namespace Metal

View File

@ -0,0 +1,94 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Metal/MTLVertexManager.h"
#include "VideoBackends/Metal/MTLStateTracker.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexShaderManager.h"
Metal::VertexManager::VertexManager()
{
}
Metal::VertexManager::~VertexManager() = default;
void Metal::VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size)
{
g_state_tracker->SetUtilityUniform(uniforms, uniforms_size);
}
bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size,
TexelBufferFormat format, u32* out_offset)
{
*out_offset = 0;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, data_size,
StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, data, data_size);
g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, 0);
return true;
}
bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size,
TexelBufferFormat format, u32* out_offset,
const void* palette_data, u32 palette_size,
TexelBufferFormat palette_format,
u32* out_palette_offset)
{
*out_offset = 0;
*out_palette_offset = 0;
const u32 aligned_data_size = g_state_tracker->Align(data_size, StateTracker::AlignMask::Other);
const u32 total_size = aligned_data_size + palette_size;
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, total_size,
StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, data, data_size);
memcpy(static_cast<char*>(map.cpu_buffer) + aligned_data_size, palette_data, palette_size);
g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset,
map.gpu_offset + aligned_data_size);
return true;
}
void Metal::VertexManager::ResetBuffer(u32 vertex_stride)
{
const u32 max_vertex_size = 65535 * vertex_stride;
const u32 vertex_alloc = max_vertex_size + vertex_stride - 1; // for alignment
auto vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, vertex_alloc);
auto index =
g_state_tracker->Preallocate(StateTracker::UploadBuffer::Index, MAXIBUFFERSIZE * sizeof(u16));
// Align the base vertex
m_base_vertex = (vertex.second + vertex_stride - 1) / vertex_stride;
m_vertex_offset = m_base_vertex * vertex_stride - vertex.second;
m_cur_buffer_pointer = m_base_buffer_pointer = static_cast<u8*>(vertex.first) + m_vertex_offset;
m_end_buffer_pointer = m_base_buffer_pointer + max_vertex_size;
m_index_generator.Start(static_cast<u16*>(index.first));
}
void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices,
u32* out_base_vertex, u32* out_base_index)
{
const u32 vsize = num_vertices * vertex_stride + m_vertex_offset;
const u32 isize = num_indices * sizeof(u16);
StateTracker::Map vmap = g_state_tracker->CommitPreallocation(
StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::None);
StateTracker::Map imap = g_state_tracker->CommitPreallocation(
StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::None);
ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, vsize);
ADDSTAT(g_stats.this_frame.bytes_index_streamed, isize);
DEBUG_ASSERT(vmap.gpu_offset + m_vertex_offset == m_base_vertex * vertex_stride);
g_state_tracker->SetVerticesAndIndices(vmap.gpu_buffer, imap.gpu_buffer);
*out_base_vertex = m_base_vertex;
*out_base_index = imap.gpu_offset / sizeof(u16);
}
void Metal::VertexManager::UploadUniforms()
{
g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, PixelShaderManager::dirty);
VertexShaderManager::dirty = false;
PixelShaderManager::dirty = false;
}

View File

@ -0,0 +1,27 @@
// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include "VideoCommon/VideoBackendBase.h"
namespace Metal
{
class VideoBackend : public VideoBackendBase
{
public:
bool Initialize(const WindowSystemInfo& wsi) override;
void Shutdown() override;
std::string GetName() const override;
std::string GetDisplayName() const override;
std::optional<std::string> GetWarningMessage() const override;
void InitBackendInfo() override;
void PrepareWindow(WindowSystemInfo& wsi) override;
static constexpr const char* NAME = "Metal";
};
} // namespace Metal

View File

@ -59,6 +59,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsTextureQueryLevels = false;
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View File

@ -93,6 +93,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsLodBiasInSampler = true;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
// TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics
// options will show the option when it is not supported. The only way around this would be

View File

@ -903,8 +903,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
}
}
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
glUseProgram(static_cast<const OGLShader*>(shader)->GetGLComputeProgramID());
glDispatchCompute(groups_x, groups_y, groups_z);

View File

@ -125,8 +125,8 @@ public:
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;

View File

@ -88,6 +88,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsTextureQueryLevels = false;
g_Config.backend_info.bSupportsLodBiasInSampler = false;
g_Config.backend_info.bSupportsSettingObjectNames = false;
g_Config.backend_info.bSupportsPartialMultisampleResolve = true;
// aamodes
g_Config.backend_info.AAModes = {1};

View File

@ -155,7 +155,7 @@ void Renderer::ClearScreen(const MathUtil::Rectangle<int>& rc, bool color_enable
bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 ||
bpmem.zcontrol.pixel_format == PixelFormat::Z24)
{
// Force alpha writes, and clear the alpha channel. This is different to the other backends,
// Force alpha writes, and clear the alpha channel. This is different from the other backends,
// where the existing values of the alpha channel are preserved.
alpha_enable = true;
color &= 0x00FFFFFF;
@ -641,8 +641,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex)
base_vertex, 0);
}
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
StateTracker::GetInstance()->SetComputeShader(static_cast<const VKShader*>(shader));
if (StateTracker::GetInstance()->BindCompute())

View File

@ -77,8 +77,8 @@ public:
float far_depth) override;
void Draw(u32 base_vertex, u32 num_vertices) override;
void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z) override;
void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override;
void BindBackbuffer(const ClearColor& clear_color = {}) override;
void PresentBackbuffer() override;
void SetFullscreen(bool enable_fullscreen) override;

View File

@ -294,6 +294,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support.
config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS.
config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features.
config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support.
}
void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)

View File

@ -98,6 +98,8 @@ constexpr BugInfo m_known_bugs[] = {
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true},
{API_OPENGL, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN,
BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, 108.4693462, true},
{API_VULKAN, OS_WINDOWS, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0,
@ -120,6 +122,8 @@ constexpr BugInfo m_known_bugs[] = {
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ALL, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_REVERSED_DEPTH_RANGE,
-1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY,
-1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
@ -130,6 +134,8 @@ constexpr BugInfo m_known_bugs[] = {
-1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_ATI, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true},
{API_OPENGL, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,
BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true},
{API_VULKAN, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN,
@ -140,8 +146,12 @@ constexpr BugInfo m_known_bugs[] = {
-1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_APPLE, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_APPLE, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true},
{API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN,
BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true},
};
static std::map<Bug, BugInfo> m_bugs;

View File

@ -13,7 +13,8 @@ namespace DriverDetails
enum API
{
API_OPENGL = (1 << 0),
API_VULKAN = (1 << 1)
API_VULKAN = (1 << 1),
API_METAL = (1 << 2),
};
// Enum of supported operating systems
@ -64,6 +65,7 @@ enum Driver
DRIVER_IMGTEC, // Official PowerVR driver
DRIVER_VIVANTE, // Official Vivante driver
DRIVER_PORTABILITY, // Vulkan via Metal on macOS
DRIVER_APPLE, // Metal on macOS
DRIVER_UNKNOWN // Unknown driver, default to official hardware driver
};

View File

@ -188,12 +188,23 @@ bool FramebufferManager::CreateEFBFramebuffer()
// Create resolved textures if MSAA is on
if (g_ActiveConfig.MultisamplingEnabled())
{
u32 flags = 0;
if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
flags |= AbstractTextureFlag_RenderTarget;
m_efb_resolve_color_texture = g_renderer->CreateTexture(
TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1,
efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0),
efb_color_texture_config.layers, 1, efb_color_texture_config.format, flags),
"EFB color resolve texture");
if (!m_efb_resolve_color_texture)
return false;
if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
m_efb_color_resolve_framebuffer =
g_renderer->CreateFramebuffer(m_efb_resolve_color_texture.get(), nullptr);
if (!m_efb_color_resolve_framebuffer)
return false;
}
}
// We also need one to convert the D24S8 to R32F if that is being used (Adreno).
@ -248,12 +259,27 @@ AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rect
clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight());
// Resolve to our already-created texture.
for (u32 layer = 0; layer < GetEFBLayers(); layer++)
if (g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region,
layer, 0);
for (u32 layer = 0; layer < GetEFBLayers(); layer++)
{
m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region,
layer, 0);
}
}
else
{
m_efb_color_texture->FinishedRendering();
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_color_resolve_framebuffer.get());
g_renderer->SetPipeline(m_efb_color_resolve_pipeline.get());
g_renderer->SetTexture(0, m_efb_color_texture.get());
g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState());
g_renderer->SetViewportAndScissor(clamped_region);
g_renderer->Draw(0, 3);
m_efb_resolve_color_texture->FinishedRendering();
g_renderer->EndUtilityDrawing();
}
m_efb_resolve_color_texture->FinishedRendering();
return m_efb_resolve_color_texture.get();
}
@ -487,6 +513,22 @@ bool FramebufferManager::CompileReadbackPipelines()
m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_depth_resolve_pipeline)
return false;
if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve)
{
config.framebuffer_state.color_texture_format = GetEFBColorFormat();
auto color_resolve_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel,
FramebufferShaderGen::GenerateResolveColorPixelShader(GetEFBSamples()),
"Color resolve pixel shader");
if (!color_resolve_shader)
return false;
config.pixel_shader = color_resolve_shader.get();
m_efb_color_resolve_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_color_resolve_pipeline)
return false;
}
}
// EFB restore pipeline

View File

@ -170,7 +170,9 @@ protected:
std::unique_ptr<AbstractFramebuffer> m_efb_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_convert_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_color_resolve_framebuffer;
std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer;
std::unique_ptr<AbstractPipeline> m_efb_color_resolve_pipeline;
std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline;
// Pipeline for restoring the contents of the EFB from a save state

View File

@ -34,6 +34,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1,
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -55,6 +56,7 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texture(samp{}, {})", n, coords);
@ -72,6 +74,7 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords)
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords);
@ -89,6 +92,7 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -138,6 +142,7 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo
switch (GetAPIType())
{
case APIType::D3D:
case APIType::Metal:
case APIType::OpenGL:
case APIType::Vulkan:
{
@ -333,6 +338,22 @@ std::string GenerateColorPixelShader()
return code.GetBuffer();
}
std::string GenerateResolveColorPixelShader(u32 samples)
{
ShaderCode code;
EmitSamplerDeclarations(code, 0, 1, true);
EmitPixelMainDeclaration(code, 1, 0);
code.Write("{{\n"
" int layer = int(v_tex0.z);\n"
" int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"
" ocol0 = float4(0.0f);\n");
code.Write(" for (int i = 0; i < {}; i++)\n", samples);
code.Write(" ocol0 += texelFetch(samp0, coords, i);\n");
code.Write(" ocol0 /= {}.0f;\n", samples);
code.Write("}}\n");
return code.GetBuffer();
}
std::string GenerateResolveDepthPixelShader(u32 samples)
{
ShaderCode code;

View File

@ -15,6 +15,7 @@ std::string GenerateScreenQuadVertexShader();
std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors);
std::string GenerateTextureCopyVertexShader();
std::string GenerateTextureCopyPixelShader();
std::string GenerateResolveColorPixelShader(u32 samples);
std::string GenerateResolveDepthPixelShader(u32 samples);
std::string GenerateClearVertexShader();
std::string GenerateEFBPokeVertexShader();

View File

@ -810,7 +810,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
{
if (!uid_data->no_dual_src)
{

View File

@ -113,8 +113,8 @@ public:
virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {}
// Dispatching compute shaders with currently-bound state.
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y,
u32 groups_z)
virtual void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y,
u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z)
{
}

View File

@ -60,6 +60,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
case APIType::D3D:
filename += "D3D";
break;
case APIType::Metal:
filename += "Metal";
break;
case APIType::OpenGL:
filename += "OpenGL";
break;

View File

@ -55,7 +55,7 @@ CompileShaderToSPV(EShLanguage stage, APIType api_type,
glslang::TShader::ForbidIncluder includer;
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules);
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
messages = static_cast<EShMessages>(messages | EShMsgVulkanRules);
int default_version = 450;

View File

@ -2924,7 +2924,8 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con
auto dispatch_groups =
TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height);
g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1);
g_renderer->DispatchComputeShader(shader, info->group_size_x, info->group_size_y, 1,
dispatch_groups.first, dispatch_groups.second, 1);
// Copy from decoding texture -> final texture
// This is because we don't want to have to create compute view for every layer

View File

@ -519,10 +519,49 @@ UBO_BINDING(std140, 1) uniform UBO {
uint u_palette_offset;
};
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#if defined(API_METAL)
#if defined(TEXEL_BUFFER_FORMAT_R8)
SSBO_BINDING(0) readonly buffer Input { uint8_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R16)
SSBO_BINDING(0) readonly buffer Input { uint16_t s_input_buffer[]; };
#define FETCH(offset) uint(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
SSBO_BINDING(0) readonly buffer Input { u8vec4 s_input_buffer[]; };
#define FETCH(offset) uvec4(s_input_buffer[offset])
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
SSBO_BINDING(0) readonly buffer Input { uvec2 s_input_buffer[]; };
#define FETCH(offset) s_input_buffer[offset]
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
SSBO_BINDING(1) readonly buffer Palette { uint16_t s_palette_buffer[]; };
#define FETCH_PALETTE(offset) uint(s_palette_buffer[offset])
#endif
#else
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
#if defined(TEXEL_BUFFER_FORMAT_R8) || defined(TEXEL_BUFFER_FORMAT_R16)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).r
#elif defined(TEXEL_BUFFER_FORMAT_RGBA8)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset))
#elif defined(TEXEL_BUFFER_FORMAT_R32G32)
#define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).rg
#else
#error No texel buffer?
#endif
#ifdef HAS_PALETTE
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
#define FETCH_PALETTE(offset) texelFetch(s_palette_buffer, int((offset) + u_palette_offset)).r
#endif
#endif // defined(API_METAL)
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier();
@ -563,7 +602,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
{
uint2 block = coords / block_size;
uint2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
buffer_pos += offset.y * block_size.x;
@ -575,7 +614,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords)
uint4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uint val = Swap16(FETCH_PALETTE(index));
uint4 color;
#if defined(PALETTE_FORMAT_IA8)
@ -633,14 +672,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i;
if ((coords.x & 1u) == 0u)
i = Convert4To8((val >> 4));
@ -663,7 +702,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uint4 color = uint4(i, i, i, a);
@ -681,7 +720,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = FETCH(buffer_pos);
uint4 color = uint4(i, i, i, i);
float4 norm_color = float4(color) / 255.0;
@ -697,7 +736,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint a = (val & 0xFFu);
uint i = (val >> 8);
uint4 color = uint4(i, i, i, a);
@ -714,7 +753,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
@ -736,7 +775,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uint val = Swap16(FETCH(buffer_pos));
uint4 color;
if ((val & 0x8000u) != 0u)
@ -771,7 +810,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// for the entire block, then the GB channels afterwards.
uint2 block = coords.xy / 4u;
uint2 offset = coords.xy % 4u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
buffer_pos += block.y * u_src_row_stride;
@ -780,8 +819,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
buffer_pos += offset.x;
// The two GB channels follow after the block's AR channels.
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uint val1 = FETCH(buffer_pos + 0u);
uint val2 = FETCH(buffer_pos + 16u);
uint4 color;
color.a = (val1 & 0xFFu);
@ -835,14 +874,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Calculate tiled block coordinates.
uint2 tile_block_coords = block_coords / 2u;
uint2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
buffer_pos += subtile_block_coords.y * 2u;
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
uint2 raw_data = FETCH(buffer_pos);
shared_temp[block_in_group] = raw_data;
}
@ -921,14 +960,14 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
uint buffer_pos = 0;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint val = FETCH(buffer_pos);
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
@ -945,7 +984,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = FETCH(buffer_pos);
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
@ -960,7 +999,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords);
uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
uint index = Swap16(FETCH(buffer_pos)) & 0x3FFFu;
float4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, int3(int2(coords), 0), norm_color);
}
@ -976,8 +1015,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
DEFINE_MAIN(8, 8)
{
uint2 uv = gl_GlobalInvocationID.xy;
int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u));
float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos));
uint buffer_pos = (uv.y * u_src_row_stride) + (uv.x / 2u);
float4 yuyv = float4(FETCH(buffer_pos));
float y = (uv.x & 1u) != 0u ? yuyv.b : yuyv.r;
@ -1034,6 +1073,25 @@ std::string GenerateDecodingShader(TextureFormat format, std::optional<TLUTForma
}
}
switch (info->buffer_format)
{
case TEXEL_BUFFER_FORMAT_R8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R16_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R16 1\n";
break;
case TEXEL_BUFFER_FORMAT_RGBA8_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_RGBA8 1\n";
break;
case TEXEL_BUFFER_FORMAT_R32G32_UINT:
ss << "#define TEXEL_BUFFER_FORMAT_R32G32 1\n";
break;
case NUM_TEXEL_BUFFER_FORMATS:
ASSERT(0);
break;
}
ss << decoding_shader_header;
ss << info->shader_body;
@ -1121,7 +1179,10 @@ float4 DecodePixel(int val)
ss << "\n";
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
if (api_type == APIType::Metal)
ss << "SSBO_BINDING(0) readonly buffer Palette { uint16_t palette[]; };\n";
else
ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n";
ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n";
ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n";
@ -1143,9 +1204,12 @@ float4 DecodePixel(int val)
ss << "void main() {\n";
ss << " float3 coords = v_tex0;\n";
ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n";
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
if (api_type == APIType::Metal)
ss << " src = int(palette[uint(src)]);\n";
else
ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n";
ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n";
ss << " src = ((src << 8) | (src >> 8)) & 0xFFFF;\n";
ss << " ocol0 = DecodePixel(src);\n";
ss << "}\n";

View File

@ -82,7 +82,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
#ifdef __APPLE__
// Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK)
// if we want to use it.
if (api_type == APIType::Vulkan)
if (api_type == APIType::Vulkan || api_type == APIType::Metal)
{
if (use_dual_source)
{

View File

@ -35,6 +35,9 @@
#ifdef HAS_VULKAN
#include "VideoBackends/Vulkan/VideoBackend.h"
#endif
#ifdef __APPLE__
#include "VideoBackends/Metal/VideoBackend.h"
#endif
#include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/BPStructs.h"
@ -227,6 +230,7 @@ const std::vector<std::unique_ptr<VideoBackendBase>>& VideoBackendBase::GetAvail
#ifdef __APPLE__
// Emplace the Vulkan backend at the beginning so it takes precedence over OpenGL.
backends.emplace(backends.begin(), std::make_unique<Vulkan::VideoBackend>());
backends.push_back(std::make_unique<Metal::VideoBackend>());
#else
backends.push_back(std::make_unique<Vulkan::VideoBackend>());
#endif

View File

@ -39,6 +39,7 @@ enum class APIType
OpenGL,
D3D,
Vulkan,
Metal,
Nothing
};

View File

@ -238,6 +238,7 @@ struct VideoConfig final
bool bSupportsTextureQueryLevels = false;
bool bSupportsLodBiasInSampler = false;
bool bSupportsSettingObjectNames = false;
bool bSupportsPartialMultisampleResolve = false;
} backend_info;
// Utility