diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 99a143f2bd..f3a6d722e2 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -631,6 +631,10 @@ if(ENABLE_VULKAN) target_link_libraries(core PUBLIC videovulkan) endif() +if(APPLE) + target_link_libraries(core PUBLIC videometal) +endif() + if(USE_MGBA) target_sources(core PRIVATE HW/GBACore.cpp diff --git a/Source/Core/VideoBackends/CMakeLists.txt b/Source/Core/VideoBackends/CMakeLists.txt index 3ab2d4c4e4..55b44f8c5d 100644 --- a/Source/Core/VideoBackends/CMakeLists.txt +++ b/Source/Core/VideoBackends/CMakeLists.txt @@ -8,6 +8,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") add_subdirectory(D3D12) endif() +if(APPLE) + add_subdirectory(Metal) +endif() + if(ENABLE_VULKAN) add_subdirectory(Vulkan) endif() diff --git a/Source/Core/VideoBackends/D3D/D3DRender.cpp b/Source/Core/VideoBackends/D3D/D3DRender.cpp index 24d7526110..988f9fb6ad 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.cpp +++ b/Source/Core/VideoBackends/D3D/D3DRender.cpp @@ -150,8 +150,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) D3D::context->DrawIndexed(num_indices, base_index, base_vertex); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { D3D::stateman->SetComputeShader(static_cast(shader)->GetD3DComputeShader()); D3D::stateman->SyncComputeBindings(); diff --git a/Source/Core/VideoBackends/D3D/D3DRender.h b/Source/Core/VideoBackends/D3D/D3DRender.h index ba247e3ec8..a0d25bc270 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.h +++ b/Source/Core/VideoBackends/D3D/D3DRender.h @@ -57,8 +57,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp index d4eb207a45..b1cb3378ed 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp @@ -365,8 +365,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { SetRootSignatures(); SetDescriptorHeaps(); diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h index eaf4544022..9624199aa4 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h @@ -69,8 +69,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; diff --git a/Source/Core/VideoBackends/Metal/CMakeLists.txt b/Source/Core/VideoBackends/Metal/CMakeLists.txt new file mode 100644 index 0000000000..a3becc1d2e --- /dev/null +++ b/Source/Core/VideoBackends/Metal/CMakeLists.txt @@ -0,0 +1,37 @@ +add_library(videometal + MRCHelpers.h + MTLBoundingBox.mm + MTLBoundingBox.h + MTLMain.mm + MTLObjectCache.h + MTLObjectCache.mm + MTLPerfQuery.mm + MTLPerfQuery.h + MTLPipeline.mm + MTLPipeline.h + MTLRenderer.mm + MTLRenderer.h + MTLShader.h + MTLStateTracker.mm + MTLStateTracker.h + MTLTexture.mm + MTLTexture.h + MTLUtil.mm + MTLUtil.h + MTLVertexManager.mm + MTLVertexManager.h + VideoBackend.h +) + +find_library(METAL_LIBRARY Metal) +find_library(QUARTZCORE_LIBRARY QuartzCore) + +target_link_libraries(videometal +PUBLIC + common + videocommon +PRIVATE + spirv_cross + ${METAL_LIBRARY} + ${QUARTZCORE_LIBRARY} +) diff --git a/Source/Core/VideoBackends/Metal/MRCHelpers.h b/Source/Core/VideoBackends/Metal/MRCHelpers.h new file mode 100644 index 0000000000..1da2e0c8d1 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MRCHelpers.h @@ -0,0 +1,81 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// clang-format off +#ifndef __OBJC__ + #error This header is for use with Objective-C++ only. +#endif +#if __has_feature(objc_arc) + #error This file is for manual reference counting! Compile without -fobjc-arc +#endif +// clang-format on + +#pragma once + +#include +#include + +/// Managed Obj-C pointer +template +class MRCOwned +{ + T ptr; + MRCOwned(T ptr) : ptr(ptr) {} + +public: + MRCOwned() : ptr(nullptr) {} + MRCOwned(std::nullptr_t) : ptr(nullptr) {} + MRCOwned(MRCOwned&& other) : ptr(other.ptr) { other.ptr = nullptr; } + MRCOwned(const MRCOwned& other) : ptr(other.ptr) { [ptr retain]; } + ~MRCOwned() + { + if (ptr) + [ptr release]; + } + operator T() const { return ptr; } + MRCOwned& operator=(const MRCOwned& other) + { + [other.ptr retain]; + if (ptr) + [ptr release]; + ptr = other.ptr; + return *this; + } + MRCOwned& operator=(MRCOwned&& other) + { + std::swap(ptr, other.ptr); + return *this; + } + void Reset() + { + [ptr release]; + ptr = nullptr; + } + T Disown() + { + T tmp = ptr; + ptr = nullptr; + return tmp; + } + T Get() const { return ptr; } + static MRCOwned Transfer(T ptr) { return MRCOwned(ptr); } + static MRCOwned Retain(T ptr) + { + [ptr retain]; + return MRCOwned(ptr); + } +}; + +/// Take ownership of an Obj-C pointer (equivalent to __bridge_transfer) +template +static inline MRCOwned MRCTransfer(T ptr) +{ + return MRCOwned::Transfer(ptr); +} + +/// Retain an Obj-C pointer (equivalent to __bridge) +template +static inline MRCOwned MRCRetain(T ptr) +{ + return MRCOwned::Retain(ptr); +} diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.h b/Source/Core/VideoBackends/Metal/MTLBoundingBox.h new file mode 100644 index 0000000000..d4a67389a2 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoCommon/BoundingBox.h" + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class BoundingBox final : public ::BoundingBox +{ +public: + ~BoundingBox() override; + + bool Initialize() override; + +protected: + std::vector Read(u32 index, u32 length) override; + void Write(u32 index, const std::vector& values) override; + +private: + BBoxType* m_cpu_buffer_ptr; + MRCOwned> m_download_fence; + MRCOwned> m_upload_fence; + MRCOwned> m_cpu_buffer; + MRCOwned> m_gpu_buffer; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm new file mode 100644 index 0000000000..89c446b040 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm @@ -0,0 +1,97 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLBoundingBox.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLStateTracker.h" + +static constexpr size_t BUFFER_SIZE = sizeof(BBoxType) * NUM_BBOX_VALUES; + +Metal::BoundingBox::~BoundingBox() +{ + if (g_state_tracker) + g_state_tracker->SetBBoxBuffer(nullptr, nullptr, nullptr); +} + +bool Metal::BoundingBox::Initialize() +{ + const MTLResourceOptions gpu_storage_mode = + g_features.unified_memory ? MTLResourceStorageModeShared : MTLResourceStorageModePrivate; + const MTLResourceOptions gpu_options = gpu_storage_mode | MTLResourceHazardTrackingModeUntracked; + const id dev = g_device; + m_upload_fence = MRCTransfer([dev newFence]); + [m_upload_fence setLabel:@"BBox Upload Fence"]; + m_download_fence = MRCTransfer([dev newFence]); + [m_download_fence setLabel:@"BBox Download Fence"]; + m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]); + if (g_features.unified_memory) + { + [m_gpu_buffer setLabel:@"BBox Buffer"]; + m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); + } + else + { + m_cpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE + options:MTLResourceStorageModeShared]); + m_cpu_buffer_ptr = static_cast([m_cpu_buffer contents]); + [m_gpu_buffer setLabel:@"BBox GPU Buffer"]; + [m_cpu_buffer setLabel:@"BBox CPU Buffer"]; + } + g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence); + return true; +} + +std::vector Metal::BoundingBox::Read(u32 index, u32 length) +{ + @autoreleasepool + { + g_state_tracker->EndRenderPass(); + if (!g_features.unified_memory) + { + id download = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + [download setLabel:@"BBox Download"]; + [download waitForFence:m_download_fence]; + [download copyFromBuffer:m_gpu_buffer + sourceOffset:0 + toBuffer:m_cpu_buffer + destinationOffset:0 + size:BUFFER_SIZE]; + [download endEncoding]; + } + g_state_tracker->FlushEncoders(); + g_state_tracker->WaitForFlushedEncoders(); + return std::vector(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); + } +} + +void Metal::BoundingBox::Write(u32 index, const std::vector& values) +{ + const u32 size = values.size() * sizeof(BBoxType); + if (g_features.unified_memory && !g_state_tracker->HasUnflushedData() && + !g_state_tracker->GPUBusy()) + { + // We can just write directly to the buffer! + memcpy(m_cpu_buffer_ptr + index, values.data(), size); + } + else + { + @autoreleasepool + { + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Other, size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, values.data(), size); + g_state_tracker->EndRenderPass(); + id upload = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + [upload setLabel:@"BBox Upload"]; + [upload waitForFence:m_download_fence]; + [upload copyFromBuffer:map.gpu_buffer + sourceOffset:map.gpu_offset + toBuffer:m_gpu_buffer + destinationOffset:index * sizeof(BBoxType) + size:size]; + [upload updateFence:m_upload_fence]; + [upload endEncoding]; + } + } +} diff --git a/Source/Core/VideoBackends/Metal/MTLMain.mm b/Source/Core/VideoBackends/Metal/MTLMain.mm new file mode 100644 index 0000000000..2b54d93784 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLMain.mm @@ -0,0 +1,157 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/VideoBackend.h" + +#include +#include +#include + +#include "Common/Common.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPerfQuery.h" +#include "VideoBackends/Metal/MTLRenderer.h" +#include "VideoBackends/Metal/MTLStateTracker.h" +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoConfig.h" + +std::string Metal::VideoBackend::GetName() const +{ + return NAME; +} + +std::string Metal::VideoBackend::GetDisplayName() const +{ + // i18n: Apple's Metal graphics API (https://developer.apple.com/metal/) + return _trans("Metal"); +} + +std::optional Metal::VideoBackend::GetWarningMessage() const +{ + if (Util::GetAdapterList().empty()) + { + return _trans("No Metal-compatible GPUs were found. " + "Use the OpenGL backend or upgrade your computer/GPU"); + } + + return std::nullopt; +} + +static bool WindowSystemTypeSupportsMetal(WindowSystemType type) +{ + switch (type) + { + case WindowSystemType::MacOS: + return true; + default: + return false; + } +} + +bool Metal::VideoBackend::Initialize(const WindowSystemInfo& wsi) +{ + @autoreleasepool + { + if (!WindowSystemTypeSupportsMetal(wsi.type) || !wsi.render_surface) + { + PanicAlertFmt("Bad WindowSystemInfo for Metal renderer."); + return false; + } + + auto devs = Util::GetAdapterList(); + if (devs.empty()) + { + PanicAlertFmt("No Metal GPUs detected."); + return false; + } + + Util::PopulateBackendInfo(&g_Config); + Util::PopulateBackendInfoAdapters(&g_Config, devs); + + // Since we haven't called InitializeShared yet, iAdapter may be out of range, + // so we have to check it ourselves. + size_t selected_adapter_index = static_cast(g_Config.iAdapter); + if (selected_adapter_index >= devs.size()) + { + WARN_LOG_FMT(VIDEO, "Metal adapter index out of range, selecting default adapter."); + selected_adapter_index = 0; + } + MRCOwned> adapter = std::move(devs[selected_adapter_index]); + Util::PopulateBackendInfoFeatures(&g_Config, adapter); + + // With the backend information populated, we can now initialize videocommon. + InitializeShared(); + + MRCOwned layer = MRCRetain(static_cast(wsi.render_surface)); + [layer setDevice:adapter]; + if (Util::ToAbstract([layer pixelFormat]) == AbstractTextureFormat::Undefined) + [layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; + CGSize size = [layer bounds].size; + float scale = [layer contentsScale]; + + ObjectCache::Initialize(std::move(adapter)); + g_state_tracker = std::make_unique(); + g_renderer = std::make_unique(std::move(layer), size.width * scale, + size.height * scale, scale); + g_vertex_manager = std::make_unique(); + g_perf_query = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_shader_cache = std::make_unique(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlertFmt("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + + return true; + } +} + +void Metal::VideoBackend::Shutdown() +{ + g_shader_cache->Shutdown(); + g_renderer->Shutdown(); + + g_shader_cache.reset(); + g_texture_cache.reset(); + g_framebuffer_manager.reset(); + g_perf_query.reset(); + g_vertex_manager.reset(); + g_renderer.reset(); + g_state_tracker.reset(); + ObjectCache::Shutdown(); + ShutdownShared(); +} + +void Metal::VideoBackend::InitBackendInfo() +{ + @autoreleasepool + { + Util::PopulateBackendInfo(&g_Config); + Util::PopulateBackendInfoAdapters(&g_Config, Util::GetAdapterList()); + } +} + +void Metal::VideoBackend::PrepareWindow(WindowSystemInfo& wsi) +{ + if (wsi.type != WindowSystemType::MacOS) + return; + NSView* view = static_cast(wsi.render_surface); + CAMetalLayer* layer = [CAMetalLayer layer]; + [view setWantsLayer:YES]; + [view setLayer:layer]; + wsi.render_surface = layer; +} diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.h b/Source/Core/VideoBackends/Metal/MTLObjectCache.h new file mode 100644 index 0000000000..d8a10d2d51 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.h @@ -0,0 +1,91 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +#include "VideoCommon/RenderState.h" + +namespace Metal +{ +extern MRCOwned> g_device; +extern MRCOwned> g_queue; + +struct DepthStencilSelector +{ + u8 value; + + DepthStencilSelector() : value(0) {} + DepthStencilSelector(bool update_enable, enum CompareMode cmp) + : value(update_enable | (static_cast(cmp) << 1)) + { + } + DepthStencilSelector(DepthState state) + : DepthStencilSelector(state.testenable ? state.updateenable : false, + state.testenable ? state.func : CompareMode::Always) + { + } + + bool UpdateEnable() const { return value & 1; } + enum CompareMode CompareMode() const { return static_cast(value >> 1); } + + bool operator==(const DepthStencilSelector& other) { return value == other.value; } + bool operator!=(const DepthStencilSelector& other) { return !(*this == other); } + static constexpr size_t N_VALUES = 1 << 4; +}; + +struct SamplerSelector +{ + u8 value; + SamplerSelector() : value(0) {} + SamplerSelector(SamplerState state) + { + value = (static_cast(state.tm0.min_filter.Value()) << 0) | + (static_cast(state.tm0.mag_filter.Value()) << 1) | + (static_cast(state.tm0.mipmap_filter.Value()) << 2) | + (static_cast(state.tm0.anisotropic_filtering) << 3); + value |= (static_cast(state.tm0.wrap_u.Value()) + + 3 * static_cast(state.tm0.wrap_v.Value())) + << 4; + } + FilterMode MinFilter() const { return static_cast(value & 1); } + FilterMode MagFilter() const { return static_cast((value >> 1) & 1); } + FilterMode MipFilter() const { return static_cast((value >> 2) & 1); } + WrapMode WrapU() const { return static_cast((value >> 4) % 3); } + WrapMode WrapV() const { return static_cast((value >> 4) / 3); } + bool AnisotropicFiltering() const { return ((value >> 3) & 1); } + + bool operator==(const SamplerSelector& other) { return value == other.value; } + bool operator!=(const SamplerSelector& other) { return !(*this == other); } + static constexpr size_t N_VALUES = (1 << 4) * 9; +}; + +class ObjectCache +{ + ObjectCache(); + +public: + ~ObjectCache(); + + static void Initialize(MRCOwned> device); + static void Shutdown(); + + id GetDepthStencil(DepthStencilSelector sel) { return m_dss[sel.value]; } + + id GetSampler(SamplerSelector sel) { return m_samplers[sel.value]; } + + id GetSampler(SamplerState state) { return GetSampler(SamplerSelector(state)); } + + void ReloadSamplers(); + +private: + MRCOwned> m_dss[DepthStencilSelector::N_VALUES]; + MRCOwned> m_samplers[SamplerSelector::N_VALUES]; +}; + +extern std::unique_ptr g_object_cache; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm new file mode 100644 index 0000000000..22809f5937 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -0,0 +1,175 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLObjectCache.h" + +#include "VideoCommon/VideoConfig.h" + +MRCOwned> Metal::g_device; +MRCOwned> Metal::g_queue; +std::unique_ptr Metal::g_object_cache; + +static void SetupDepthStencil( + MRCOwned> (&dss)[Metal::DepthStencilSelector::N_VALUES]); +static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]); + +Metal::ObjectCache::ObjectCache() +{ + SetupDepthStencil(m_dss); + SetupSamplers(m_samplers); +} + +Metal::ObjectCache::~ObjectCache() +{ +} + +void Metal::ObjectCache::Initialize(MRCOwned> device) +{ + g_device = std::move(device); + g_queue = MRCTransfer([g_device newCommandQueue]); + g_object_cache = std::unique_ptr(new ObjectCache); +} + +void Metal::ObjectCache::Shutdown() +{ + g_object_cache.reset(); + g_queue = nullptr; + g_device = nullptr; +} + +// MARK: Depth Stencil State + +// clang-format off + +static MTLCompareFunction Convert(CompareMode mode) +{ + const bool invert_depth = !g_Config.backend_info.bSupportsReversedDepthRange; + switch (mode) + { + case CompareMode::Never: return MTLCompareFunctionNever; + case CompareMode::Less: return invert_depth ? MTLCompareFunctionGreater + : MTLCompareFunctionLess; + case CompareMode::Equal: return MTLCompareFunctionEqual; + case CompareMode::LEqual: return invert_depth ? MTLCompareFunctionGreaterEqual + : MTLCompareFunctionLessEqual; + case CompareMode::Greater: return invert_depth ? MTLCompareFunctionLess + : MTLCompareFunctionGreater; + case CompareMode::NEqual: return MTLCompareFunctionNotEqual; + case CompareMode::GEqual: return invert_depth ? MTLCompareFunctionLessEqual + : MTLCompareFunctionGreaterEqual; + case CompareMode::Always: return MTLCompareFunctionAlways; + } +} + +static const char* to_string(MTLCompareFunction compare) +{ + switch (compare) + { + case MTLCompareFunctionNever: return "Never"; + case MTLCompareFunctionGreater: return "Greater"; + case MTLCompareFunctionEqual: return "Equal"; + case MTLCompareFunctionGreaterEqual: return "GEqual"; + case MTLCompareFunctionLess: return "Less"; + case MTLCompareFunctionNotEqual: return "NEqual"; + case MTLCompareFunctionLessEqual: return "LEqual"; + case MTLCompareFunctionAlways: return "Always"; + } +} + +// clang-format on + +static void SetupDepthStencil( + MRCOwned> (&dss)[Metal::DepthStencilSelector::N_VALUES]) +{ + auto desc = MRCTransfer([MTLDepthStencilDescriptor new]); + Metal::DepthStencilSelector sel; + for (size_t i = 0; i < std::size(dss); ++i) + { + sel.value = i; + MTLCompareFunction mcompare = Convert(sel.CompareMode()); + [desc setDepthWriteEnabled:sel.UpdateEnable()]; + [desc setDepthCompareFunction:mcompare]; + [desc setLabel:[NSString stringWithFormat:@"DSS %s%s", to_string(mcompare), + sel.UpdateEnable() ? "+Write" : ""]]; + dss[i] = MRCTransfer([Metal::g_device newDepthStencilStateWithDescriptor:desc]); + } +} + +// MARK: Samplers + +// clang-format off + +static MTLSamplerMinMagFilter ConvertMinMag(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return MTLSamplerMinMagFilterLinear; + case FilterMode::Near: return MTLSamplerMinMagFilterNearest; + } +} + +static MTLSamplerMipFilter ConvertMip(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return MTLSamplerMipFilterLinear; + case FilterMode::Near: return MTLSamplerMipFilterNearest; + } +} + +static MTLSamplerAddressMode Convert(WrapMode wrap) +{ + switch (wrap) + { + case WrapMode::Clamp: return MTLSamplerAddressModeClampToEdge; + case WrapMode::Mirror: return MTLSamplerAddressModeMirrorRepeat; + case WrapMode::Repeat: return MTLSamplerAddressModeRepeat; + } +} + +static const char* to_string(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return "Ln"; + case FilterMode::Near: return "Pt"; + } +} +static const char* to_string(WrapMode wrap) +{ + switch (wrap) + { + case WrapMode::Clamp: return "C"; + case WrapMode::Mirror: return "M"; + case WrapMode::Repeat: return "R"; + } +} + +// clang-format on + +static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]) +{ + auto desc = MRCTransfer([MTLSamplerDescriptor new]); + Metal::SamplerSelector sel; + for (size_t i = 0; i < std::size(samplers); i++) + { + sel.value = i; + [desc setMinFilter:ConvertMinMag(sel.MinFilter())]; + [desc setMagFilter:ConvertMinMag(sel.MagFilter())]; + [desc setMipFilter:ConvertMip(sel.MipFilter())]; + [desc setSAddressMode:Convert(sel.WrapU())]; + [desc setTAddressMode:Convert(sel.WrapV())]; + [desc setMaxAnisotropy:1 << (sel.AnisotropicFiltering() ? g_ActiveConfig.iMaxAnisotropy : 0)]; + [desc setLabel:MRCTransfer([[NSString alloc] + initWithFormat:@"%s%s%s %s%s%s", to_string(sel.MinFilter()), + to_string(sel.MagFilter()), to_string(sel.MipFilter()), + to_string(sel.WrapU()), to_string(sel.WrapV()), + sel.AnisotropicFiltering() ? "(AF)" : ""])]; + samplers[i] = MRCTransfer([Metal::g_device newSamplerStateWithDescriptor:desc]); + } +} + +void Metal::ObjectCache::ReloadSamplers() +{ + SetupSamplers(m_samplers); +} diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h new file mode 100644 index 0000000000..793cf8cec5 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h @@ -0,0 +1,20 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "VideoCommon/PerfQueryBase.h" + +namespace Metal +{ +class PerfQuery final : public PerfQueryBase +{ +public: + void EnableQuery(PerfQueryGroup type) override {} + void DisableQuery(PerfQueryGroup type) override {} + void ResetQuery() override {} + u32 GetQueryResult(PerfQueryType type) override { return 0; } + void FlushResults() override {} + bool IsFlushed() const override { return true; } +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm new file mode 100644 index 0000000000..2892bdc747 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm @@ -0,0 +1,4 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLPerfQuery.h" diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.h b/Source/Core/VideoBackends/Metal/MTLPipeline.h new file mode 100644 index 0000000000..d5a642aedd --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.h @@ -0,0 +1,64 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLShader.h" + +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" + +namespace Metal +{ +class Pipeline final : public AbstractPipeline +{ +public: + explicit Pipeline(MRCOwned> pipeline, + MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, + MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage); + + id Get() const { return m_pipeline; } + MTLPrimitiveType Prim() const { return m_prim; } + MTLCullMode Cull() const { return m_cull; } + DepthStencilSelector DepthStencil() const { return m_depth_stencil; } + AbstractPipelineUsage Usage() const { return m_usage; } + u32 GetTextures() const { return m_textures; } + u32 GetSamplers() const { return m_samplers; } + u32 GetVertexBuffers() const { return m_vertex_buffers; } + u32 GetFragmentBuffers() const { return m_fragment_buffers; } + bool UsesVertexBuffer(u32 index) const { return m_vertex_buffers & (1 << index); } + bool UsesFragmentBuffer(u32 index) const { return m_fragment_buffers & (1 << index); } + +private: + MRCOwned> m_pipeline; + MTLPrimitiveType m_prim; + MTLCullMode m_cull; + DepthStencilSelector m_depth_stencil; + AbstractPipelineUsage m_usage; + u32 m_textures = 0; + u32 m_samplers = 0; + u32 m_vertex_buffers = 0; + u32 m_fragment_buffers = 0; +}; + +class ComputePipeline : public Shader +{ +public: + explicit ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection, + std::string msl, MRCOwned> shader, + MRCOwned> pipeline); + + id GetComputePipeline() const { return m_compute_pipeline; } + bool UsesTexture(u32 index) const { return m_textures & (1 << index); } + bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); } + +private: + MRCOwned> m_compute_pipeline; + u32 m_textures = 0; + u32 m_buffers = 0; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.mm b/Source/Core/VideoBackends/Metal/MTLPipeline.mm new file mode 100644 index 0000000000..405ea76973 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.mm @@ -0,0 +1,66 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLPipeline.h" + +#include "Common/MsgHandler.h" + +static void MarkAsUsed(u32* list, u32 start, u32 length) +{ + for (u32 i = start; i < start + length; ++i) + *list |= 1 << i; +} + +static void GetArguments(NSArray* arguments, u32* textures, u32* samplers, + u32* buffers) +{ + for (MTLArgument* argument in arguments) + { + const u32 idx = [argument index]; + const u32 length = [argument arrayLength]; + if (idx + length > 32) + { + PanicAlertFmt("Making a MTLPipeline with high argument index {:d}..<{:d} for {:s}", // + idx, idx + length, [[argument name] UTF8String]); + continue; + } + switch ([argument type]) + { + case MTLArgumentTypeTexture: + if (textures) + MarkAsUsed(textures, idx, length); + else + PanicAlertFmt("Vertex function wants a texture!"); + break; + case MTLArgumentTypeSampler: + if (samplers) + MarkAsUsed(samplers, idx, length); + else + PanicAlertFmt("Vertex function wants a sampler!"); + break; + case MTLArgumentTypeBuffer: + MarkAsUsed(buffers, idx, length); + break; + default: + break; + } + } +} + +Metal::Pipeline::Pipeline(MRCOwned> pipeline, + MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, + MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage) + : m_pipeline(std::move(pipeline)), m_prim(prim), m_cull(cull), m_depth_stencil(depth), + m_usage(usage) +{ + GetArguments([reflection vertexArguments], nullptr, nullptr, &m_vertex_buffers); + GetArguments([reflection fragmentArguments], &m_textures, &m_samplers, &m_fragment_buffers); +} + +Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection, + std::string msl, MRCOwned> shader, + MRCOwned> pipeline) + : Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline)) +{ + GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers); +} diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.h b/Source/Core/VideoBackends/Metal/MTLRenderer.h new file mode 100644 index 0000000000..6208e3f57e --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.h @@ -0,0 +1,91 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoCommon/RenderBase.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class Framebuffer; +class Texture; + +class Renderer final : public ::Renderer +{ +public: + Renderer(MRCOwned layer, int width, int height, float layer_scale); + ~Renderer() override; + + bool IsHeadless() const override; + + bool Initialize() override; + + std::unique_ptr CreateTexture(const TextureConfig& config, + std::string_view name) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; + std::unique_ptr + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; + + std::unique_ptr CreateShaderFromSource(ShaderStage stage, std::string_view source, + std::string_view name) override; + std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, + size_t length, + std::string_view name) override; + std::unique_ptr CreateShaderFromMSL(ShaderStage stage, std::string msl, + std::string_view glsl, std::string_view name); + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config, + const void* cache_data = nullptr, + size_t cache_data_length = 0) override; + + void Flush() override; + void WaitForGPUIdle() override; + void OnConfigChanged(u32 bits) override; + + void ClearScreen(const MathUtil::Rectangle& rc, bool color_enable, bool alpha_enable, + bool z_enable, u32 color, u32 z) override; + + void SetPipeline(const AbstractPipeline* pipeline) override; + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, + float depth_value = 0.0f) override; + void SetScissorRect(const MathUtil::Rectangle& rc) override; + void SetTexture(u32 index, const AbstractTexture* texture) override; + void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; + void UnbindTexture(const AbstractTexture* texture) override; + void SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; + void BindBackbuffer(const ClearColor& clear_color = {}) override; + void PresentBackbuffer() override; + +protected: + std::unique_ptr<::BoundingBox> CreateBoundingBox() const override; + +private: + MRCOwned m_layer; + MRCOwned> m_drawable; + std::unique_ptr m_bb_texture; + std::unique_ptr m_backbuffer; + u32 m_texture_counter = 0; + u32 m_staging_texture_counter = 0; + std::array m_shader_counter = {}; + u32 m_pipeline_counter = 0; + + void CheckForSurfaceChange(); + void CheckForSurfaceResize(); + void SetupSurface(); +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.mm b/Source/Core/VideoBackends/Metal/MTLRenderer.mm new file mode 100644 index 0000000000..b552b11722 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.mm @@ -0,0 +1,709 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLRenderer.h" + +#include "VideoBackends/Metal/MTLBoundingBox.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPipeline.h" +#include "VideoBackends/Metal/MTLStateTracker.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexShaderGen.h" +#include "VideoCommon/VideoBackendBase.h" + +Metal::Renderer::Renderer(MRCOwned layer, int width, int height, float layer_scale) + : ::Renderer(width, height, layer_scale, Util::ToAbstract([layer pixelFormat])), + m_layer(std::move(layer)) +{ + UpdateActiveConfig(); +} + +Metal::Renderer::~Renderer() = default; + +bool Metal::Renderer::IsHeadless() const +{ + return m_layer == nullptr; +} + +bool Metal::Renderer::Initialize() +{ + if (!::Renderer::Initialize()) + return false; + SetupSurface(); + g_state_tracker->FlushEncoders(); + return true; +} + +// MARK: Texture Creation + +std::unique_ptr Metal::Renderer::CreateTexture(const TextureConfig& config, + std::string_view name) +{ + @autoreleasepool + { + MRCOwned desc = MRCTransfer([MTLTextureDescriptor new]); + [desc setTextureType:config.samples > 1 ? MTLTextureType2DMultisampleArray : + MTLTextureType2DArray]; + [desc setPixelFormat:Util::FromAbstract(config.format)]; + [desc setWidth:config.width]; + [desc setHeight:config.height]; + [desc setMipmapLevelCount:config.levels]; + [desc setArrayLength:config.layers]; + [desc setSampleCount:config.samples]; + [desc setStorageMode:MTLStorageModePrivate]; + MTLTextureUsage usage = MTLTextureUsageShaderRead; + if (config.IsRenderTarget()) + usage |= MTLTextureUsageRenderTarget; + if (config.IsComputeImage()) + usage |= MTLTextureUsageShaderWrite; + [desc setUsage:usage]; + id texture = [g_device newTextureWithDescriptor:desc]; + if (!texture) + return nullptr; + + if (name.empty()) + [texture setLabel:[NSString stringWithFormat:@"Texture %d", m_texture_counter++]]; + else + [texture setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data() + length:name.size() + encoding:NSUTF8StringEncoding])]; + return std::make_unique(MRCTransfer(texture), config); + } +} + +std::unique_ptr +Metal::Renderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config) +{ + @autoreleasepool + { + const size_t stride = config.GetStride(); + const size_t buffer_size = stride * static_cast(config.height); + + MTLResourceOptions options = MTLStorageModeShared; + if (type == StagingTextureType::Upload) + options |= MTLResourceCPUCacheModeWriteCombined; + + id buffer = [g_device newBufferWithLength:buffer_size options:options]; + if (!buffer) + return nullptr; + [buffer + setLabel:[NSString stringWithFormat:@"Staging Texture %d", m_staging_texture_counter++]]; + return std::make_unique(MRCTransfer(buffer), type, config); + } +} + +std::unique_ptr +Metal::Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) +{ + AbstractTexture* const either_attachment = color_attachment ? color_attachment : depth_attachment; + return std::make_unique( + color_attachment, depth_attachment, either_attachment->GetWidth(), + either_attachment->GetHeight(), either_attachment->GetLayers(), + either_attachment->GetSamples()); +} + +// MARK: Pipeline Creation + +namespace Metal +{ +class VertexFormat : public NativeVertexFormat +{ +public: + VertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new])) + { + [[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride]; + SetAttribute(SHADER_POSITION_ATTRIB, vtx_decl.position); + SetAttributes(SHADER_NORMAL_ATTRIB, vtx_decl.normals); + SetAttributes(SHADER_COLOR0_ATTRIB, vtx_decl.colors); + SetAttributes(SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords); + SetAttribute(SHADER_POSMTX_ATTRIB, vtx_decl.posmtx); + } + + MTLVertexDescriptor* Get() const { return m_desc; } + +private: + template + void SetAttributes(u32 attribute, const AttributeFormat (&format)[N]) + { + for (size_t i = 0; i < N; i++) + SetAttribute(attribute + i, format[i]); + } + void SetAttribute(u32 attribute, const AttributeFormat& format) + { + if (!format.enable) + return; + MTLVertexAttributeDescriptor* desc = [[m_desc attributes] objectAtIndexedSubscript:attribute]; + [desc setFormat:ConvertFormat(format.type, format.components, format.integer)]; + [desc setOffset:format.offset]; + [desc setBufferIndex:0]; + } + + static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format) + { + static constexpr MTLVertexFormat formats[2][5][4] = { + [false] = { + [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized }, + [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatCharNormalized, MTLVertexFormatChar2Normalized, MTLVertexFormatChar3Normalized, MTLVertexFormatChar4Normalized }, + [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, MTLVertexFormatUShort4Normalized }, + [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShortNormalized, MTLVertexFormatShort2Normalized, MTLVertexFormatShort3Normalized, MTLVertexFormatShort4Normalized }, + [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, + }, + [true] = { + [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4 }, + [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4 }, + [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4 }, + [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4 }, + [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, + }, + }; + return formats[int_format][static_cast(format)][count - 1]; + } + + MRCOwned m_desc; +}; +} // namespace Metal + +std::unique_ptr Metal::Renderer::CreateShaderFromSource(ShaderStage stage, + std::string_view source, + std::string_view name) +{ + std::optional msl = Util::TranslateShaderToMSL(stage, source); + if (!msl.has_value()) + { + PanicAlertFmt("Failed to convert shader {} to MSL", name); + return nullptr; + } + + return CreateShaderFromMSL(stage, std::move(*msl), source, name); +} + +std::unique_ptr Metal::Renderer::CreateShaderFromBinary(ShaderStage stage, + const void* data, + size_t length, + std::string_view name) +{ + return CreateShaderFromMSL(stage, std::string(static_cast(data), length), {}, name); +} + +// clang-format off + +static const char* StageFilename(ShaderStage stage) +{ + switch (stage) + { + case ShaderStage::Vertex: return "vs"; + case ShaderStage::Geometry: return "gs"; + case ShaderStage::Pixel: return "ps"; + case ShaderStage::Compute: return "cs"; + } +} + +static NSString* GenericShaderName(ShaderStage stage) +{ + switch (stage) + { + case ShaderStage::Vertex: return @"Vertex shader %d"; + case ShaderStage::Geometry: return @"Geometry shader %d"; + case ShaderStage::Pixel: return @"Pixel shader %d"; + case ShaderStage::Compute: return @"Compute shader %d"; + } +} + +// clang-format on + +std::unique_ptr Metal::Renderer::CreateShaderFromMSL(ShaderStage stage, + std::string msl, + std::string_view glsl, + std::string_view name) +{ + @autoreleasepool + { + NSError* err = nullptr; + auto DumpBadShader = [&](std::string_view msg) { + static int counter = 0; + std::string filename = VideoBackendBase::BadShaderFilename(StageFilename(stage), counter++); + std::ofstream stream(filename); + if (stream.good()) + { + stream << msl << std::endl; + stream << "/*" << std::endl; + stream << msg << std::endl; + stream << "Error:" << std::endl; + stream << [[err localizedDescription] UTF8String] << std::endl; + if (!glsl.empty()) + { + stream << "Original GLSL:" << std::endl; + stream << glsl << std::endl; + } + else + { + stream << "Shader was created with cached MSL so no GLSL is available." << std::endl; + } + } + + stream << std::endl; + stream << "Dolphin Version: " << Common::GetScmRevStr() << std::endl; + stream << "Video Backend: " << g_video_backend->GetDisplayName() << std::endl; + stream << "*/" << std::endl; + stream.close(); + + PanicAlertFmt("{} (written to {})\n", msg, filename); + }; + + auto lib = MRCTransfer([g_device newLibraryWithSource:[NSString stringWithUTF8String:msl.data()] + options:nil + error:&err]); + if (err) + { + DumpBadShader(fmt::format("Failed to compile {}", name)); + return nullptr; + } + auto fn = MRCTransfer([lib newFunctionWithName:@"main0"]); + if (!fn) + { + DumpBadShader(fmt::format("Shader {} is missing its main0 function", name)); + return nullptr; + } + if (!name.empty()) + [fn setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data() + length:name.size() + encoding:NSUTF8StringEncoding])]; + else + [fn setLabel:[NSString stringWithFormat:GenericShaderName(stage), + m_shader_counter[static_cast(stage)]++]]; + [lib setLabel:[fn label]]; + if (stage == ShaderStage::Compute) + { + MTLComputePipelineReflection* reflection = nullptr; + auto desc = [MTLComputePipelineDescriptor new]; + [desc setComputeFunction:fn]; + [desc setLabel:[fn label]]; + MRCOwned> pipeline = + MRCTransfer([g_device newComputePipelineStateWithDescriptor:desc + options:MTLPipelineOptionArgumentInfo + reflection:&reflection + error:&err]); + if (err) + { + DumpBadShader(fmt::format("Failed to compile compute pipeline {}", name)); + return nullptr; + } + return std::make_unique(stage, reflection, std::move(msl), std::move(fn), + std::move(pipeline)); + } + return std::make_unique(stage, std::move(msl), std::move(fn)); + } +} + +std::unique_ptr +Metal::Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + @autoreleasepool + { + return std::make_unique(vtx_decl); + } +} + +static MTLPrimitiveTopologyClass GetClass(PrimitiveType prim) +{ + switch (prim) + { + case PrimitiveType::Points: + return MTLPrimitiveTopologyClassPoint; + case PrimitiveType::Lines: + return MTLPrimitiveTopologyClassLine; + case PrimitiveType::Triangles: + case PrimitiveType::TriangleStrip: + return MTLPrimitiveTopologyClassTriangle; + } +} + +static MTLPrimitiveType Convert(PrimitiveType prim) +{ + switch (prim) + { + case PrimitiveType::Points: return MTLPrimitiveTypePoint; + case PrimitiveType::Lines: return MTLPrimitiveTypeLine; + case PrimitiveType::Triangles: return MTLPrimitiveTypeTriangle; + case PrimitiveType::TriangleStrip: return MTLPrimitiveTypeTriangleStrip; + } +} + +static MTLCullMode Convert(CullMode cull) +{ + switch (cull) + { + case CullMode::None: + case CullMode::All: // Handled by disabling rasterization + return MTLCullModeNone; + case CullMode::Front: + return MTLCullModeFront; + case CullMode::Back: + return MTLCullModeBack; + } +} + +static MTLBlendFactor Convert(DstBlendFactor factor, bool src1) +{ + static constexpr MTLBlendFactor factors[2][8] = { + [false] = { + [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, + [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSourceColor, + [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, + [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, + [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + [true] = { + [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, + [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSource1Color, + [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, + [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, + [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + }; + return factors[src1][static_cast(factor)]; +} + +static MTLBlendFactor Convert(SrcBlendFactor factor, bool src1) +{ + static constexpr MTLBlendFactor factors[2][8] = { + [false] = { + [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, + [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, + [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, + [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, + [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + [true] = { + [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, + [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, + [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, + [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, + [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + }; + return factors[src1][static_cast(factor)]; +} + +std::unique_ptr +Metal::Renderer::CreatePipeline(const AbstractPipelineConfig& config, const void* cache_data, + size_t cache_data_length) +{ + @autoreleasepool + { + assert(!config.geometry_shader); + auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [desc setLabel:[NSString stringWithFormat:@"Pipeline %d", m_pipeline_counter++]]; + [desc setVertexFunction:static_cast(config.vertex_shader)->GetShader()]; + [desc setFragmentFunction:static_cast(config.pixel_shader)->GetShader()]; + if (config.vertex_format) + [desc setVertexDescriptor:static_cast(config.vertex_format)->Get()]; + RasterizationState rs = config.rasterization_state; + [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; + if (rs.cullmode == CullMode::All) + [desc setRasterizationEnabled:NO]; + MTLRenderPipelineColorAttachmentDescriptor* color0 = [desc colorAttachments][0]; + BlendingState bs = config.blending_state; + MTLColorWriteMask mask = MTLColorWriteMaskNone; + if (bs.colorupdate) + mask |= MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue; + if (bs.alphaupdate) + mask |= MTLColorWriteMaskAlpha; + [color0 setWriteMask:mask]; + if (bs.blendenable) + { + [color0 setBlendingEnabled:YES]; + [color0 setSourceRGBBlendFactor: Convert(bs.srcfactor, bs.usedualsrc)]; + [color0 setSourceAlphaBlendFactor: Convert(bs.srcfactoralpha, bs.usedualsrc)]; + [color0 setDestinationRGBBlendFactor: Convert(bs.dstfactor, bs.usedualsrc)]; + [color0 setDestinationAlphaBlendFactor:Convert(bs.dstfactoralpha, bs.usedualsrc)]; + [color0 setRgbBlendOperation: bs.subtract ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + [color0 setAlphaBlendOperation:bs.subtractAlpha ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + } + FramebufferState fs = config.framebuffer_state; + [color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)]; + [desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + if (Util::HasStencil(fs.depth_texture_format)) + [desc setStencilAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + NSError* err = nullptr; + MTLRenderPipelineReflection* reflection = nullptr; + id pipe = + [g_device newRenderPipelineStateWithDescriptor:desc + options:MTLPipelineOptionArgumentInfo + reflection:&reflection + error:&err]; + if (err) + { + PanicAlertFmt("Failed to compile pipeline for {} and {}: {}", + [[[desc vertexFunction] label] UTF8String], + [[[desc fragmentFunction] label] UTF8String], + [[err localizedDescription] UTF8String]); + return nullptr; + } + return std::make_unique(MRCTransfer(pipe), reflection, Convert(rs.primitive), + Convert(rs.cullmode), config.depth_state, config.usage); + } +} + +void Metal::Renderer::Flush() +{ + @autoreleasepool + { + g_state_tracker->FlushEncoders(); + } +} + +void Metal::Renderer::WaitForGPUIdle() +{ + @autoreleasepool + { + g_state_tracker->FlushEncoders(); + g_state_tracker->WaitForFlushedEncoders(); + } +} + +void Metal::Renderer::OnConfigChanged(u32 bits) +{ + if (bits & CONFIG_CHANGE_BIT_VSYNC) + [m_layer setDisplaySyncEnabled:g_ActiveConfig.bVSyncActive]; + + if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) + { + g_object_cache->ReloadSamplers(); + g_state_tracker->ReloadSamplers(); + } +} + +void Metal::Renderer::ClearScreen(const MathUtil::Rectangle& rc, bool color_enable, + bool alpha_enable, bool z_enable, u32 color, u32 z) +{ + MathUtil::Rectangle target_rc = Renderer::ConvertEFBRectangle(rc); + target_rc.ClampUL(0, 0, m_target_width, m_target_height); + + // All Metal render passes are fullscreen, so we can only run a fast clear if the target is too + if (target_rc == MathUtil::Rectangle(0, 0, m_target_width, m_target_height)) + { + // Determine whether the EFB has an alpha channel. If it doesn't, we can clear the alpha + // channel to 0xFF. This hopefully allows us to use the fast path in most cases. + if (bpmem.zcontrol.pixel_format == PixelFormat::RGB565_Z16 || + bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 || + bpmem.zcontrol.pixel_format == PixelFormat::Z24) + { + // Force alpha writes, and clear the alpha channel. This is different to the other backends, + // where the existing values of the alpha channel are preserved. + alpha_enable = true; + color &= 0x00FFFFFF; + } + + bool c_ok = (color_enable && alpha_enable) || + g_state_tracker->GetCurrentFramebuffer()->GetColorFormat() == + AbstractTextureFormat::Undefined; + bool z_ok = z_enable || g_state_tracker->GetCurrentFramebuffer()->GetDepthFormat() == + AbstractTextureFormat::Undefined; + if (c_ok && z_ok) + { + @autoreleasepool + { + // clang-format off + MTLClearColor clear_color = MTLClearColorMake( + static_cast((color >> 16) & 0xFF) / 255.0, + static_cast((color >> 8) & 0xFF) / 255.0, + static_cast((color >> 0) & 0xFF) / 255.0, + static_cast((color >> 24) & 0xFF) / 255.0); + // clang-format on + float z_normalized = static_cast(z & 0xFFFFFF) / 16777216.0f; + if (!g_Config.backend_info.bSupportsReversedDepthRange) + z_normalized = 1.f - z_normalized; + g_state_tracker->BeginClearRenderPass(clear_color, z_normalized); + return; + } + } + } + + g_state_tracker->EnableEncoderLabel(false); + g_framebuffer_manager->ClearEFB(rc, color_enable, alpha_enable, z_enable, color, z); + g_state_tracker->EnableEncoderLabel(true); +} + +void Metal::Renderer::SetPipeline(const AbstractPipeline* pipeline) +{ + g_state_tracker->SetPipeline(static_cast(pipeline)); +} + +void Metal::Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) +{ + // Shouldn't be bound as a texture. + if (AbstractTexture* color = framebuffer->GetColorAttachment()) + g_state_tracker->UnbindTexture(static_cast(color)->GetMTLTexture()); + if (AbstractTexture* depth = framebuffer->GetDepthAttachment()) + g_state_tracker->UnbindTexture(static_cast(depth)->GetMTLTexture()); + + m_current_framebuffer = framebuffer; + g_state_tracker->SetCurrentFramebuffer(static_cast(framebuffer)); +} + +void Metal::Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) +{ + @autoreleasepool + { + SetFramebuffer(framebuffer); + g_state_tracker->BeginRenderPass(MTLLoadActionDontCare); + } +} + +void Metal::Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value, float depth_value) +{ + @autoreleasepool + { + SetFramebuffer(framebuffer); + MTLClearColor color = + MTLClearColorMake(color_value[0], color_value[1], color_value[2], color_value[3]); + g_state_tracker->BeginClearRenderPass(color, depth_value); + } +} + +void Metal::Renderer::SetScissorRect(const MathUtil::Rectangle& rc) +{ + g_state_tracker->SetScissor(rc); +} + +void Metal::Renderer::SetTexture(u32 index, const AbstractTexture* texture) +{ + g_state_tracker->SetTexture( + index, texture ? static_cast(texture)->GetMTLTexture() : nullptr); +} + +void Metal::Renderer::SetSamplerState(u32 index, const SamplerState& state) +{ + g_state_tracker->SetSampler(index, state); +} + +void Metal::Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + g_state_tracker->SetComputeTexture(static_cast(texture)); +} + +void Metal::Renderer::UnbindTexture(const AbstractTexture* texture) +{ + g_state_tracker->UnbindTexture(static_cast(texture)->GetMTLTexture()); +} + +void Metal::Renderer::SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) +{ + g_state_tracker->SetViewport(x, y, width, height, near_depth, far_depth); +} + +void Metal::Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + @autoreleasepool + { + g_state_tracker->Draw(base_vertex, num_vertices); + } +} + +void Metal::Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + @autoreleasepool + { + g_state_tracker->DrawIndexed(base_index, num_indices, base_vertex); + } +} + +void Metal::Renderer::DispatchComputeShader(const AbstractShader* shader, // + u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, + u32 groups_x, u32 groups_y, u32 groups_z) +{ + @autoreleasepool + { + g_state_tracker->SetPipeline(static_cast(shader)); + g_state_tracker->DispatchComputeShader(groupsize_x, groupsize_y, groupsize_z, // + groups_x, groups_y, groups_z); + } +} + +void Metal::Renderer::BindBackbuffer(const ClearColor& clear_color) +{ + @autoreleasepool + { + CheckForSurfaceChange(); + CheckForSurfaceResize(); + m_drawable = MRCRetain([m_layer nextDrawable]); + m_bb_texture->SetMTLTexture(MRCRetain([m_drawable texture])); + SetAndClearFramebuffer(m_backbuffer.get(), clear_color); + } +} + +void Metal::Renderer::PresentBackbuffer() +{ + @autoreleasepool + { + g_state_tracker->EndRenderPass(); + if (m_drawable) + { + [g_state_tracker->GetRenderCmdBuf() + addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }]; + m_bb_texture->SetMTLTexture(nullptr); + m_drawable = nullptr; + } + g_state_tracker->FlushEncoders(); + } +} + +std::unique_ptr<::BoundingBox> Metal::Renderer::CreateBoundingBox() const +{ + return std::make_unique(); +} + +void Metal::Renderer::CheckForSurfaceChange() +{ + if (!m_surface_changed.TestAndClear()) + return; + m_layer = MRCRetain(static_cast(m_new_surface_handle)); + m_new_surface_handle = nullptr; + SetupSurface(); +} + +void Metal::Renderer::CheckForSurfaceResize() +{ + if (!m_surface_resized.TestAndClear()) + return; + SetupSurface(); +} + +void Metal::Renderer::SetupSurface() +{ + CGSize size = [m_layer bounds].size; + // TODO: Update m_backbuffer_scale (need to make doing that not break everything) + const float backbuffer_scale = [m_layer contentsScale]; + size.width *= backbuffer_scale; + size.height *= backbuffer_scale; + [m_layer setDrawableSize:size]; + m_backbuffer_width = size.width; + m_backbuffer_height = size.height; + TextureConfig cfg(m_backbuffer_width, m_backbuffer_height, 1, 1, 1, m_backbuffer_format, + AbstractTextureFlag_RenderTarget); + m_bb_texture = std::make_unique(nullptr, cfg); + m_backbuffer = std::make_unique(m_bb_texture.get(), nullptr, // + m_backbuffer_width, m_backbuffer_height, 1, 1); +} diff --git a/Source/Core/VideoBackends/Metal/MTLShader.h b/Source/Core/VideoBackends/Metal/MTLShader.h new file mode 100644 index 0000000000..053d84c2b9 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLShader.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" + +namespace Metal +{ +class Shader : public AbstractShader +{ +public: + explicit Shader(ShaderStage stage, std::string msl, MRCOwned> shader) + : AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader)) + { + } + + id GetShader() const { return m_shader; } + BinaryData GetBinary() const override { return BinaryData(m_msl.begin(), m_msl.end()); } + +private: + std::string m_msl; + MRCOwned> m_shader; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h new file mode 100644 index 0000000000..d9c79774e0 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -0,0 +1,266 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonTypes.h" +#include "Common/MathUtil.h" + +#include "VideoBackends/Metal/MRCHelpers.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" + +#include "VideoCommon/RenderBase.h" + +namespace Metal +{ +class Pipeline; +class ComputePipeline; + +class StateTracker +{ +public: + enum class UploadBuffer + { + Other, + Uniform, + Vertex, + Index, + Texels, + Last = Texels + }; + + struct Map + { + id gpu_buffer; + size_t gpu_offset; + void* cpu_buffer; + }; + + enum class AlignMask : size_t + { + Other = 15, + Uniform = 255, + }; + + StateTracker(StateTracker&&) = delete; + explicit StateTracker(); + ~StateTracker(); + + Framebuffer* GetCurrentFramebuffer() { return m_current_framebuffer; }; + void SetCurrentFramebuffer(Framebuffer* framebuffer); + void BeginClearRenderPass(MTLClearColor color, float depth); + void BeginRenderPass(MTLLoadAction load_action); + void BeginRenderPass(MTLRenderPassDescriptor* descriptor); + void BeginComputePass(); + MTLRenderPassDescriptor* GetRenderPassDescriptor(Framebuffer* framebuffer, + MTLLoadAction load_action); + + void EndRenderPass(); + void FlushEncoders(); + void WaitForFlushedEncoders(); + bool HasUnflushedData() { return static_cast(m_current_render_cmdbuf); } + bool GPUBusy() + { + return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire); + } + void ReloadSamplers(); + + void SetPipeline(const Pipeline* pipe); + void SetPipeline(const ComputePipeline* pipe); + void SetScissor(const MathUtil::Rectangle& rect); + void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth); + void SetTexture(u32 idx, id texture); + void SetSampler(u32 idx, const SamplerState& sampler); + void SetComputeTexture(const Texture* texture); + void InvalidateUniforms(bool vertex, bool fragment); + void SetUtilityUniform(const void* buffer, size_t size); + void SetTexelBuffer(id buffer, u32 offset0, u32 offset1); + void SetVerticesAndIndices(Map vertices, Map indices); + void SetBBoxBuffer(id bbox, id upload, id download); + void SetVertexBufferNow(u32 idx, id buffer, u32 offset); + void SetFragmentBufferNow(u32 idx, id buffer, u32 offset); + /// Use around utility draws that are commonly used immediately before gx draws to the same buffer + void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; } + void UnbindTexture(id texture); + + void Draw(u32 base_vertex, u32 num_vertices); + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex); + void DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x, + u32 groups_y, u32 groups_z); + void ResolveTexture(id src, id dst, u32 layer, u32 level); + + size_t Align(size_t amt, AlignMask align) + { + return (amt + static_cast(align)) & ~static_cast(align); + } + Map AllocateForTextureUpload(size_t amt); + Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) + { + Preallocate(buffer_idx, amt); + return CommitPreallocation(buffer_idx, amt, align); + } + void* Preallocate(UploadBuffer buffer_idx, size_t amt); + /// Must follow a call to Preallocate where amt is >= to the one provided here + Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align) + { + DEBUG_ASSERT((m_upload_buffers[static_cast(buffer_idx)].usage.Pos() & + static_cast(align)) == 0); + return CommitPreallocation(buffer_idx, Align(amt, align)); + } + id GetUploadEncoder(); + id GetTextureUploadEncoder(); + id GetRenderCmdBuf(); + +private: + class UsageTracker + { + struct UsageEntry + { + u64 drawno; + size_t pos; + }; + std::vector m_usage; + size_t m_size = 0; + size_t m_pos = 0; + + public: + size_t Size() { return m_size; } + size_t Pos() { return m_pos; } + bool PrepareForAllocation(u64 last_draw, size_t amt); + size_t Allocate(u64 current_draw, size_t amt); + void Reset(size_t new_size); + }; + + struct CPUBuffer + { + UsageTracker usage; + MRCOwned> mtlbuffer; + void* buffer = nullptr; + }; + + struct BufferPair + { + UsageTracker usage; + MRCOwned> cpubuffer; + MRCOwned> gpubuffer; + void* buffer = nullptr; + size_t last_upload = 0; + }; + + struct Backref; + + std::shared_ptr m_backref; + MRCOwned> m_fence; + MRCOwned> m_upload_cmdbuf; + MRCOwned> m_upload_encoder; + MRCOwned> m_texture_upload_cmdbuf; + MRCOwned> m_texture_upload_encoder; + MRCOwned> m_current_render_cmdbuf; + MRCOwned> m_last_render_cmdbuf; + MRCOwned> m_current_render_encoder; + MRCOwned> m_current_compute_encoder; + MRCOwned m_render_pass_desc[3]; + MRCOwned m_resolve_pass_desc; + Framebuffer* m_current_framebuffer; + CPUBuffer m_texture_upload_buffer; + BufferPair m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; + u64 m_current_draw = 1; + std::atomic m_last_finished_draw{0}; + + MRCOwned> m_dummy_texture; + + // MARK: State + u8 m_dirty_textures; + u8 m_dirty_samplers; + union Flags + { + struct + { + // clang-format off + bool has_gx_vs_uniform : 1; + bool has_gx_ps_uniform : 1; + bool has_utility_vs_uniform : 1; + bool has_utility_ps_uniform : 1; + bool has_compute_texture : 1; + bool has_pipeline : 1; + bool has_scissor : 1; + bool has_viewport : 1; + bool has_vertices : 1; + bool has_texel_buffer : 1; + bool bbox_fence : 1; + bool should_apply_label : 1; + // clang-format on + }; + u16 bits = 0; + void NewEncoder() + { + Flags reset_mask; + // Set the flags you *don't* want to reset + reset_mask.should_apply_label = true; + bits &= reset_mask.bits; + } + } m_flags; + + /// Things that represent the state of the encoder + struct Current + { + NSString* label; + id pipeline; + std::array, 2> vertex_buffers; + std::array, 2> fragment_buffers; + u32 width; + u32 height; + MathUtil::Rectangle scissor_rect; + Util::Viewport viewport; + MTLDepthClipMode depth_clip_mode; + MTLCullMode cull_mode; + DepthStencilSelector depth_stencil; + } m_current; + + /// Things that represent what we'd *like* to have on the encoder for the next draw + struct State + { + MathUtil::Rectangle scissor_rect; + Util::Viewport viewport; + const Pipeline* render_pipeline = nullptr; + const ComputePipeline* compute_pipeline = nullptr; + std::array, 8> textures = {}; + std::array, 8> samplers = {}; + std::array sampler_min_lod; + std::array sampler_max_lod; + std::array sampler_states; + const Texture* compute_texture = nullptr; + std::unique_ptr utility_uniform; + u32 utility_uniform_size = 0; + u32 utility_uniform_capacity = 0; + id bbox = nullptr; + id bbox_upload_fence = nullptr; + id bbox_download_fence = nullptr; + id vertices = nullptr; + id indices = nullptr; + id texels = nullptr; + u32 vertices_offset; + u32 indices_offset; + u32 texel_buffer_offset0; + u32 texel_buffer_offset1; + } m_state; + + void SetSamplerForce(u32 idx, const SamplerState& sampler); + void Sync(BufferPair& buffer); + Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); + void CheckViewport(); + void CheckScissor(); + void PrepareRender(); + void PrepareCompute(); +}; + +extern std::unique_ptr g_state_tracker; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm new file mode 100644 index 0000000000..ea2c8cd328 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -0,0 +1,849 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLStateTracker.h" + +#include +#include + +#include "Common/Assert.h" +#include "Common/BitUtils.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPipeline.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" + +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +std::unique_ptr Metal::g_state_tracker; + +struct Metal::StateTracker::Backref +{ + std::mutex mtx; + StateTracker* state_tracker; + explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {} +}; + +static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) +{ + // clang-format off + switch (buffer) + { + case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; + case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; + case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; + case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; + case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; + } + // clang-format on +} + +// MARK: - UsageTracker + +bool Metal::StateTracker::UsageTracker::PrepareForAllocation(u64 last_draw, size_t amt) +{ + auto removeme = std::find_if(m_usage.begin(), m_usage.end(), + [last_draw](UsageEntry usage) { return usage.drawno > last_draw; }); + if (removeme != m_usage.begin()) + m_usage.erase(m_usage.begin(), removeme); + + bool still_in_use = false; + const bool needs_wrap = m_pos + amt > m_size; + if (!m_usage.empty()) + { + size_t used = m_usage.front().pos; + if (needs_wrap) + still_in_use = used >= m_pos || used < amt; + else + still_in_use = used >= m_pos && used < m_pos + amt; + } + if (needs_wrap) + m_pos = 0; + + return still_in_use || amt > m_size; +} + +size_t Metal::StateTracker::UsageTracker::Allocate(u64 current_draw, size_t amt) +{ + if (m_usage.empty() || m_usage.back().drawno != current_draw) + m_usage.push_back({current_draw, m_pos}); + size_t ret = m_pos; + m_pos += amt; + return ret; +} + +void Metal::StateTracker::UsageTracker::Reset(size_t new_size) +{ + m_usage.clear(); + m_size = new_size; + m_pos = 0; +} + +// MARK: - StateTracker + +Metal::StateTracker::StateTracker() : m_backref(std::make_shared(this)) +{ + m_flags.should_apply_label = true; + m_fence = MRCTransfer([g_device newFence]); + for (MRCOwned& rpdesc : m_render_pass_desc) + { + rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); + [[rpdesc depthAttachment] setStoreAction:MTLStoreActionStore]; + [[rpdesc stencilAttachment] setStoreAction:MTLStoreActionStore]; + } + m_resolve_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]); + auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0]; + [color0 setLoadAction:MTLLoadActionLoad]; + [color0 setStoreAction:MTLStoreActionMultisampleResolve]; + MTLTextureDescriptor* texdesc = + [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:1 + height:1 + mipmapped:NO]; + [texdesc setTextureType:MTLTextureType2DArray]; + [texdesc setUsage:MTLTextureUsageShaderRead]; + [texdesc setStorageMode:MTLStorageModePrivate]; + m_dummy_texture = MRCTransfer([g_device newTextureWithDescriptor:texdesc]); + [m_dummy_texture setLabel:@"Dummy Texture"]; + for (size_t i = 0; i < std::size(m_state.samplers); ++i) + { + SetSamplerForce(i, RenderState::GetLinearSamplerState()); + SetTexture(i, m_dummy_texture); + } +} + +Metal::StateTracker::~StateTracker() +{ + FlushEncoders(); + std::lock_guard lock(m_backref->mtx); + m_backref->state_tracker = nullptr; +} + +// MARK: BufferPair Ops + +Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt) +{ + amt = (amt + 15) & ~15ull; + CPUBuffer& buffer = m_texture_upload_buffer; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + if (__builtin_expect(needs_new, false)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"]; + ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.mtlbuffer contents]; + buffer.usage.Reset(newsize); + } + + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + + Map ret = {buffer.mtlbuffer, pos, reinterpret_cast(buffer.buffer) + pos}; + DEBUG_ASSERT(pos <= buffer.usage.Size() && + "Previous code should have guaranteed there was enough space"); + return ret; +} + +void* Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) +{ + BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + size_t base_pos = buffer.usage.Pos(); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + bool needs_upload = needs_new || buffer.usage.Pos() == 0; + if (!g_features.unified_memory && needs_upload) + { + if (base_pos != buffer.last_upload) + { + id encoder = GetUploadEncoder(); + [encoder copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:base_pos - buffer.last_upload]; + } + buffer.last_upload = 0; + } + if (__builtin_expect(needs_new, false)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.cpubuffer setLabel:GetName(buffer_idx)]; + ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.cpubuffer contents]; + buffer.usage.Reset(newsize); + if (!g_features.unified_memory) + { + options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; + buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.gpubuffer setLabel:GetName(buffer_idx)]; + ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + } + } + return reinterpret_cast(buffer.buffer) + buffer.usage.Pos(); +} + +Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, + size_t amt) +{ + BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + Map ret = {nil, pos, reinterpret_cast(buffer.buffer) + pos}; + ret.gpu_buffer = g_features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; + DEBUG_ASSERT(pos <= buffer.usage.Size() && + "Previous code should have guaranteed there was enough space"); + return ret; +} + +void Metal::StateTracker::Sync(BufferPair& buffer) +{ + if (g_features.unified_memory || buffer.usage.Pos() == buffer.last_upload) + return; + + id encoder = GetUploadEncoder(); + [encoder copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:buffer.usage.Pos() - buffer.last_upload]; + buffer.last_upload = buffer.usage.Pos(); +} + +// MARK: Render Pass / Encoder Management + +id Metal::StateTracker::GetUploadEncoder() +{ + if (!m_upload_cmdbuf) + { + @autoreleasepool + { + m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_upload_cmdbuf setLabel:@"Vertex Upload"]; + m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]); + [m_upload_encoder setLabel:@"Vertex Upload"]; + } + } + return m_upload_encoder; +} + +id Metal::StateTracker::GetTextureUploadEncoder() +{ + if (!m_texture_upload_cmdbuf) + { + @autoreleasepool + { + m_texture_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_texture_upload_cmdbuf setLabel:@"Texture Upload"]; + m_texture_upload_encoder = MRCRetain([m_texture_upload_cmdbuf blitCommandEncoder]); + [m_texture_upload_encoder setLabel:@"Texture Upload"]; + } + } + return m_texture_upload_encoder; +} + +id Metal::StateTracker::GetRenderCmdBuf() +{ + if (!m_current_render_cmdbuf) + { + @autoreleasepool + { + m_current_render_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_current_render_cmdbuf setLabel:@"Draw"]; + } + } + return m_current_render_cmdbuf; +} + +void Metal::StateTracker::SetCurrentFramebuffer(Framebuffer* framebuffer) +{ + if (framebuffer == m_current_framebuffer) + return; + EndRenderPass(); + m_current_framebuffer = framebuffer; +} + +MTLRenderPassDescriptor* Metal::StateTracker::GetRenderPassDescriptor(Framebuffer* framebuffer, + MTLLoadAction load_action) +{ + const AbstractTextureFormat depth_fmt = framebuffer->GetDepthFormat(); + MTLRenderPassDescriptor* desc; + if (depth_fmt == AbstractTextureFormat::Undefined) + desc = m_render_pass_desc[0]; + else if (!Util::HasStencil(depth_fmt)) + desc = m_render_pass_desc[1]; + else + desc = m_render_pass_desc[2]; + desc.colorAttachments[0].texture = framebuffer->GetColor(); + desc.colorAttachments[0].loadAction = load_action; + if (depth_fmt != AbstractTextureFormat::Undefined) + { + desc.depthAttachment.texture = framebuffer->GetDepth(); + desc.depthAttachment.loadAction = load_action; + if (Util::HasStencil(depth_fmt)) + { + desc.stencilAttachment.texture = framebuffer->GetDepth(); + desc.stencilAttachment.loadAction = load_action; + } + } + return desc; +} + +void Metal::StateTracker::BeginClearRenderPass(MTLClearColor color, float depth) +{ + Framebuffer* framebuffer = m_current_framebuffer; + MTLRenderPassDescriptor* desc = GetRenderPassDescriptor(framebuffer, MTLLoadActionClear); + desc.colorAttachments[0].clearColor = color; + if (framebuffer->GetDepthFormat() != AbstractTextureFormat::Undefined) + { + desc.depthAttachment.clearDepth = depth; + if (Util::HasStencil(framebuffer->GetDepthFormat())) + desc.stencilAttachment.clearStencil = 0; + } + BeginRenderPass(desc); +} + +void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action) +{ + if (m_current_render_encoder) + return; + BeginRenderPass(GetRenderPassDescriptor(m_current_framebuffer, load_action)); +} + +void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) +{ + EndRenderPass(); + m_current_render_encoder = + MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); + if (!g_features.unified_memory) + [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; + AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); + if (!attachment) + attachment = m_current_framebuffer->GetDepthAttachment(); + static_assert(std::is_trivially_copyable::value, + "Make sure we can memset this"); + memset(&m_current, 0, sizeof(m_current)); + m_current.width = attachment->GetWidth(); + m_current.height = attachment->GetHeight(); + m_current.scissor_rect = MathUtil::Rectangle(0, 0, m_current.width, m_current.height); + m_current.viewport = { + 0.f, 0.f, static_cast(m_current.width), static_cast(m_current.height), + 0.f, 1.f}; + m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always); + m_current.depth_clip_mode = MTLDepthClipModeClip; + m_current.cull_mode = MTLCullModeNone; + m_flags.NewEncoder(); + m_dirty_samplers = 0xff; + m_dirty_textures = 0xff; + CheckScissor(); + CheckViewport(); + ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!"); +} + +void Metal::StateTracker::BeginComputePass() +{ + EndRenderPass(); + m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); + [m_current_compute_encoder setLabel:@"Compute"]; + if (!g_features.unified_memory) + [m_current_compute_encoder waitForFence:m_fence]; + m_flags.NewEncoder(); + m_dirty_samplers = 0xff; + m_dirty_textures = 0xff; +} + +void Metal::StateTracker::EndRenderPass() +{ + if (m_current_render_encoder) + { + if (m_flags.bbox_fence && m_state.bbox_download_fence) + [m_current_render_encoder updateFence:m_state.bbox_download_fence + afterStages:MTLRenderStageFragment]; + [m_current_render_encoder endEncoding]; + m_current_render_encoder = nullptr; + } + if (m_current_compute_encoder) + { + [m_current_compute_encoder endEncoding]; + m_current_compute_encoder = nullptr; + } +} + +void Metal::StateTracker::FlushEncoders() +{ + if (!m_current_render_cmdbuf) + return; + EndRenderPass(); + for (int i = 0; i <= static_cast(UploadBuffer::Last); ++i) + Sync(m_upload_buffers[i]); + if (g_features.unified_memory) + { + ASSERT(!m_upload_cmdbuf && "Should never be used!"); + } + else if (m_upload_cmdbuf) + { + [m_upload_encoder updateFence:m_fence]; + [m_upload_encoder endEncoding]; + [m_upload_cmdbuf commit]; + m_upload_encoder = nullptr; + m_upload_cmdbuf = nullptr; + } + if (m_texture_upload_cmdbuf) + { + [m_texture_upload_encoder endEncoding]; + [m_texture_upload_cmdbuf commit]; + m_texture_upload_encoder = nullptr; + m_texture_upload_cmdbuf = nullptr; + } + [m_current_render_cmdbuf + addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) { + std::lock_guard guard(backref->mtx); + if (StateTracker* tracker = backref->state_tracker) + { + // We can do the update non-atomically because we only ever update under the lock + u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed)); + tracker->m_last_finished_draw.store(newval, std::memory_order_release); + } + }]; + [m_current_render_cmdbuf commit]; + m_last_render_cmdbuf = std::move(m_current_render_cmdbuf); + m_current_render_cmdbuf = nullptr; + m_current_draw++; +} + +void Metal::StateTracker::WaitForFlushedEncoders() +{ + [m_last_render_cmdbuf waitUntilCompleted]; +} + +void Metal::StateTracker::ReloadSamplers() +{ + for (size_t i = 0; i < std::size(m_state.samplers); ++i) + m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]); +} + +// MARK: State Setters + +void Metal::StateTracker::SetPipeline(const Pipeline* pipe) +{ + if (pipe != m_state.render_pipeline) + { + m_state.render_pipeline = pipe; + m_flags.has_pipeline = false; + } +} + +void Metal::StateTracker::SetPipeline(const ComputePipeline* pipe) +{ + if (pipe != m_state.compute_pipeline) + { + m_state.compute_pipeline = pipe; + m_flags.has_pipeline = false; + } +} + +void Metal::StateTracker::SetScissor(const MathUtil::Rectangle& rect) +{ + m_state.scissor_rect = rect; + CheckScissor(); +} + +void Metal::StateTracker::CheckScissor() +{ + auto clipped = m_state.scissor_rect; + clipped.ClampUL(0, 0, m_current.width, m_current.height); + m_flags.has_scissor = clipped == m_current.scissor_rect; +} + +void Metal::StateTracker::SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) +{ + m_state.viewport = {x, y, width, height, near_depth, far_depth}; + CheckViewport(); +} + +void Metal::StateTracker::CheckViewport() +{ + m_flags.has_viewport = + 0 == memcmp(&m_state.viewport, &m_current.viewport, sizeof(m_current.viewport)); +} + +void Metal::StateTracker::SetTexture(u32 idx, id texture) +{ + ASSERT(idx < std::size(m_state.textures)); + if (!texture) + texture = m_dummy_texture; + if (m_state.textures[idx] != texture) + { + m_state.textures[idx] = texture; + m_dirty_textures |= 1 << idx; + } +} + +void Metal::StateTracker::SetSamplerForce(u32 idx, const SamplerState& sampler) +{ + m_state.samplers[idx] = g_object_cache->GetSampler(sampler); + m_state.sampler_min_lod[idx] = sampler.tm1.min_lod; + m_state.sampler_max_lod[idx] = sampler.tm1.max_lod; + m_state.sampler_states[idx] = sampler; + m_dirty_samplers |= 1 << idx; +} + +void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler) +{ + ASSERT(idx < std::size(m_state.samplers)); + if (m_state.sampler_states[idx] != sampler) + SetSamplerForce(idx, sampler); +} + +void Metal::StateTracker::SetComputeTexture(const Texture* texture) +{ + if (m_state.compute_texture != texture) + { + m_state.compute_texture = texture; + m_flags.has_compute_texture = false; + } +} + +void Metal::StateTracker::UnbindTexture(id texture) +{ + for (size_t i = 0; i < std::size(m_state.textures); ++i) + { + if (m_state.textures[i] == texture) + { + m_state.textures[i] = m_dummy_texture; + m_dirty_textures |= 1 << i; + } + } +} + +void Metal::StateTracker::InvalidateUniforms(bool vertex, bool fragment) +{ + m_flags.has_gx_vs_uniform &= !vertex; + m_flags.has_gx_ps_uniform &= !fragment; +} + +void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size) +{ + if (m_state.utility_uniform_capacity < size) + { + m_state.utility_uniform = std::unique_ptr(new u8[size]); + m_state.utility_uniform_capacity = size; + } + m_state.utility_uniform_size = size; + memcpy(m_state.utility_uniform.get(), buffer, size); + m_flags.has_utility_vs_uniform = false; + m_flags.has_utility_ps_uniform = false; +} + +void Metal::StateTracker::SetTexelBuffer(id buffer, u32 offset0, u32 offset1) +{ + m_state.texels = buffer; + m_state.texel_buffer_offset0 = offset0; + m_state.texel_buffer_offset1 = offset1; + m_flags.has_texel_buffer = false; +} + +void Metal::StateTracker::SetVerticesAndIndices(Map vertices, Map indices) +{ + m_state.vertices = vertices.gpu_buffer; + m_state.indices = indices.gpu_buffer; + m_state.vertices_offset = vertices.gpu_offset; + m_state.indices_offset = indices.gpu_offset; + m_flags.has_vertices = false; +} + +void Metal::StateTracker::SetBBoxBuffer(id bbox, id upload, + id download) +{ + m_state.bbox = bbox; + m_state.bbox_upload_fence = upload; + m_state.bbox_download_fence = download; +} + +void Metal::StateTracker::SetVertexBufferNow(u32 idx, id buffer, u32 offset) +{ + if (idx < std::size(m_current.vertex_buffers) && m_current.vertex_buffers[idx] == buffer) + { + [m_current_render_encoder setVertexBufferOffset:offset atIndex:idx]; + } + else + { + [m_current_render_encoder setVertexBuffer:buffer offset:offset atIndex:idx]; + m_current.vertex_buffers[idx] = buffer; + } +} + +void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id buffer, u32 offset) +{ + if (idx < std::size(m_current.fragment_buffers) && m_current.fragment_buffers[idx] == buffer) + { + [m_current_render_encoder setFragmentBufferOffset:offset atIndex:idx]; + } + else + { + [m_current_render_encoder setFragmentBuffer:buffer offset:offset atIndex:idx]; + m_current.fragment_buffers[idx] = buffer; + } +} + +// MARK: Render + +// clang-format off +static constexpr NSString* LABEL_GX = @"GX Draw"; +static constexpr NSString* LABEL_UTIL = @"Utility Draw"; +// clang-format on + +static NSRange RangeOfBits(u32 value) +{ + ASSERT(value && "Value must be nonzero"); + u32 low = Common::CountTrailingZeros(value); + u32 high = 31 - Common::CountLeadingZeros(value); + return NSMakeRange(low, high + 1 - low); +} + +void Metal::StateTracker::PrepareRender() +{ + if (!m_current_render_encoder) + BeginRenderPass(MTLLoadActionLoad); + id enc = m_current_render_encoder; + const Pipeline* pipe = m_state.render_pipeline; + bool is_gx = pipe->Usage() == AbstractPipelineUsage::GX; + NSString* label = is_gx ? LABEL_GX : LABEL_UTIL; + if (m_flags.should_apply_label && m_current.label != label) + { + m_current.label = label; + [m_current_render_encoder setLabel:label]; + } + if (!m_flags.has_pipeline) + { + m_flags.has_pipeline = true; + if (pipe->Get() != m_current.pipeline) + { + m_current.pipeline = pipe->Get(); + [enc setRenderPipelineState:pipe->Get()]; + } + if (pipe->Cull() != m_current.cull_mode) + { + m_current.cull_mode = pipe->Cull(); + [enc setCullMode:pipe->Cull()]; + } + if (pipe->DepthStencil() != m_current.depth_stencil) + { + m_current.depth_stencil = pipe->DepthStencil(); + [enc setDepthStencilState:g_object_cache->GetDepthStencil(m_current.depth_stencil)]; + } + MTLDepthClipMode clip = is_gx && g_ActiveConfig.backend_info.bSupportsDepthClamp ? + MTLDepthClipModeClamp : + MTLDepthClipModeClip; + if (clip != m_current.depth_clip_mode) + { + m_current.depth_clip_mode = clip; + [enc setDepthClipMode:clip]; + } + if (is_gx && m_state.bbox_upload_fence && !m_flags.bbox_fence && pipe->UsesFragmentBuffer(2)) + { + m_flags.bbox_fence = true; + [enc waitForFence:m_state.bbox_upload_fence beforeStages:MTLRenderStageFragment]; + [enc setFragmentBuffer:m_state.bbox offset:0 atIndex:2]; + } + } + if (!m_flags.has_viewport) + { + m_flags.has_viewport = true; + m_current.viewport = m_state.viewport; + MTLViewport metal; + metal.originX = m_state.viewport.x; + metal.originY = m_state.viewport.y; + metal.width = m_state.viewport.width; + metal.height = m_state.viewport.height; + metal.znear = m_state.viewport.near_depth; + metal.zfar = m_state.viewport.far_depth; + [enc setViewport:metal]; + } + if (!m_flags.has_scissor) + { + m_flags.has_scissor = true; + m_current.scissor_rect = m_state.scissor_rect; + m_current.scissor_rect.ClampUL(0, 0, m_current.width, m_current.height); + MTLScissorRect metal; + metal.x = m_current.scissor_rect.left; + metal.y = m_current.scissor_rect.top; + metal.width = m_current.scissor_rect.right - m_current.scissor_rect.left; + metal.height = m_current.scissor_rect.bottom - m_current.scissor_rect.top; + [enc setScissorRect:metal]; + } + if (!m_flags.has_vertices && pipe->UsesVertexBuffer(0)) + { + m_flags.has_vertices = true; + if (m_state.vertices) + SetVertexBufferNow(0, m_state.vertices, m_state.vertices_offset); + } + if (u8 dirty = m_dirty_textures & pipe->GetTextures()) + { + m_dirty_textures &= ~pipe->GetTextures(); + NSRange range = RangeOfBits(dirty); + [enc setFragmentTextures:&m_state.textures[range.location] withRange:range]; + } + if (u8 dirty = m_dirty_samplers & pipe->GetSamplers()) + { + m_dirty_samplers &= ~pipe->GetSamplers(); + NSRange range = RangeOfBits(dirty); + [enc setFragmentSamplerStates:&m_state.samplers[range.location] + lodMinClamps:m_state.sampler_min_lod.data() + lodMaxClamps:m_state.sampler_max_lod.data() + withRange:range]; + } + if (is_gx) + { + // GX draw + if (!m_flags.has_gx_vs_uniform) + { + m_flags.has_gx_vs_uniform = true; + Map map = Allocate(UploadBuffer::Uniform, sizeof(VertexShaderConstants), AlignMask::Uniform); + memcpy(map.cpu_buffer, &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + SetVertexBufferNow(1, map.gpu_buffer, map.gpu_offset); + if (pipe->UsesFragmentBuffer(1)) + SetFragmentBufferNow(1, map.gpu_buffer, map.gpu_offset); + ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, + Align(sizeof(VertexShaderConstants), AlignMask::Uniform)); + } + if (!m_flags.has_gx_ps_uniform) + { + m_flags.has_gx_ps_uniform = true; + Map map = Allocate(UploadBuffer::Uniform, sizeof(PixelShaderConstants), AlignMask::Uniform); + memcpy(map.cpu_buffer, &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + SetFragmentBufferNow(0, map.gpu_buffer, map.gpu_offset); + ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, + Align(sizeof(PixelShaderConstants), AlignMask::Uniform)); + } + } + else + { + // Utility draw + if (!m_flags.has_utility_vs_uniform && pipe->UsesVertexBuffer(1)) + { + m_flags.has_utility_vs_uniform = true; + m_flags.has_gx_vs_uniform = false; + [enc setVertexBytes:m_state.utility_uniform.get() + length:m_state.utility_uniform_size + atIndex:1]; + } + if (!m_flags.has_utility_ps_uniform && pipe->UsesFragmentBuffer(0)) + { + m_flags.has_utility_ps_uniform = true; + m_flags.has_gx_ps_uniform = false; + [enc setFragmentBytes:m_state.utility_uniform.get() + length:m_state.utility_uniform_size + atIndex:0]; + } + if (!m_flags.has_texel_buffer && pipe->UsesFragmentBuffer(2)) + { + m_flags.has_texel_buffer = true; + SetFragmentBufferNow(2, m_state.texels, m_state.texel_buffer_offset0); + } + } +} + +void Metal::StateTracker::PrepareCompute() +{ + if (!m_current_compute_encoder) + BeginComputePass(); + id enc = m_current_compute_encoder; + const ComputePipeline* pipe = m_state.compute_pipeline; + if (!m_flags.has_pipeline) + { + m_flags.has_pipeline = true; + [enc setComputePipelineState:pipe->GetComputePipeline()]; + } + if (!m_flags.has_compute_texture && pipe->UsesTexture(0)) + { + m_flags.has_compute_texture = true; + [enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0]; + } + // Compute and render can't happen at the same time, so just reuse one of the flags + if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0)) + { + m_flags.has_utility_vs_uniform = true; + [enc setBytes:m_state.utility_uniform.get() length:m_state.utility_uniform_size atIndex:0]; + } + if (!m_flags.has_texel_buffer && pipe->UsesBuffer(2)) + { + m_flags.has_texel_buffer = true; + [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset0 atIndex:2]; + if (pipe->UsesBuffer(3)) + [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset1 atIndex:3]; + } +} + +void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices) +{ + PrepareRender(); + [m_current_render_encoder drawPrimitives:m_state.render_pipeline->Prim() + vertexStart:base_vertex + vertexCount:num_vertices]; +} + +void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + PrepareRender(); + if (!base_vertex) + { + [m_current_render_encoder + drawIndexedPrimitives:m_state.render_pipeline->Prim() + indexCount:num_indices + indexType:MTLIndexTypeUInt16 + indexBuffer:m_state.indices + indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16)]; + } + else + { + [m_current_render_encoder + drawIndexedPrimitives:m_state.render_pipeline->Prim() + indexCount:num_indices + indexType:MTLIndexTypeUInt16 + indexBuffer:m_state.indices + indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16) + instanceCount:1 + baseVertex:base_vertex + baseInstance:0]; + } +} + +void Metal::StateTracker::DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, + u32 groups_x, u32 groups_y, u32 groups_z) +{ + PrepareCompute(); + [m_current_compute_encoder + dispatchThreadgroups:MTLSizeMake(groups_x, groups_y, groups_z) + threadsPerThreadgroup:MTLSizeMake(groupsize_x, groupsize_y, groupsize_z)]; +} + +void Metal::StateTracker::ResolveTexture(id src, id dst, u32 layer, + u32 level) +{ + EndRenderPass(); + auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0]; + [color0 setTexture:src]; + [color0 setResolveTexture:dst]; + [color0 setResolveSlice:layer]; + [color0 setResolveLevel:level]; + id enc = + [GetRenderCmdBuf() renderCommandEncoderWithDescriptor:m_resolve_pass_desc]; + [enc setLabel:@"Multisample Resolve"]; + [enc endEncoding]; +} diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.h b/Source/Core/VideoBackends/Metal/MTLTexture.h new file mode 100644 index 0000000000..7c01f1de4b --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLTexture.h @@ -0,0 +1,77 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class Texture final : public AbstractTexture +{ +public: + explicit Texture(MRCOwned> tex, const TextureConfig& config); + ~Texture(); + + void CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) override; + void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, + u32 layer, u32 level) override; + void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, + size_t buffer_size) override; + + id GetMTLTexture() const { return m_tex; } + void SetMTLTexture(MRCOwned> tex) { m_tex = std::move(tex); } + +private: + MRCOwned> m_tex; +}; + +class StagingTexture final : public AbstractStagingTexture +{ +public: + StagingTexture(MRCOwned> buffer, StagingTextureType type, + const TextureConfig& config); + ~StagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + +private: + MRCOwned> m_buffer; + MRCOwned> m_wait_buffer; +}; + +class Framebuffer final : public AbstractFramebuffer +{ +public: + Framebuffer(AbstractTexture* color, AbstractTexture* depth, u32 width, u32 height, u32 layers, + u32 samples); + ~Framebuffer(); + + id GetColor() const + { + return static_cast(GetColorAttachment())->GetMTLTexture(); + } + id GetDepth() const + { + return static_cast(GetDepthAttachment())->GetMTLTexture(); + } +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.mm b/Source/Core/VideoBackends/Metal/MTLTexture.mm new file mode 100644 index 0000000000..67c114caa7 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLTexture.mm @@ -0,0 +1,179 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLTexture.h" + +#include "Common/Align.h" +#include "Common/Assert.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +Metal::Texture::Texture(MRCOwned> tex, const TextureConfig& config) + : AbstractTexture(config), m_tex(std::move(tex)) +{ +} + +Metal::Texture::~Texture() +{ + if (g_state_tracker) + g_state_tracker->UnbindTexture(m_tex); +} + +void Metal::Texture::CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) +{ + g_state_tracker->EndRenderPass(); + id msrc = static_cast(src)->GetMTLTexture(); + id blit = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + MTLSize size = MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + [blit setLabel:@"Texture Copy"]; + [blit copyFromTexture:msrc + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0) + sourceSize:size + toTexture:m_tex + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)]; + [blit endEncoding]; +} + +void Metal::Texture::ResolveFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& rect, u32 layer, u32 level) +{ + ASSERT(rect == MathUtil::Rectangle(0, 0, src->GetWidth(), src->GetHeight())); + id src_tex = static_cast(src)->GetMTLTexture(); + g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level); +} + +void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, // + const u8* buffer, size_t buffer_size) +{ + @autoreleasepool + { + const u32 block_size = GetBlockSizeForFormat(GetFormat()); + const u32 num_rows = Common::AlignUp(height, block_size) / block_size; + const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); + const u32 upload_size = source_pitch * num_rows; + StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size); + memcpy(map.cpu_buffer, buffer, upload_size); + id encoder = g_state_tracker->GetTextureUploadEncoder(); + [encoder copyFromBuffer:map.gpu_buffer + sourceOffset:map.gpu_offset + sourceBytesPerRow:source_pitch + sourceBytesPerImage:upload_size + sourceSize:MTLSizeMake(width, height, 1) + toTexture:m_tex + destinationSlice:0 + destinationLevel:level + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } +} + +Metal::StagingTexture::StagingTexture(MRCOwned> buffer, StagingTextureType type, + const TextureConfig& config) + : AbstractStagingTexture(type, config), m_buffer(std::move(buffer)) +{ + m_map_pointer = static_cast([m_buffer contents]); + m_map_stride = config.GetStride(); +} + +Metal::StagingTexture::~StagingTexture() = default; + +void Metal::StagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, // + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) +{ + @autoreleasepool + { + const size_t stride = m_config.GetStride(); + const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size; + const MTLSize size = + MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + g_state_tracker->EndRenderPass(); + m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf()); + id download_encoder = [m_wait_buffer blitCommandEncoder]; + [download_encoder setLabel:@"Texture Download"]; + [download_encoder copyFromTexture:static_cast(src)->GetMTLTexture() + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0) + sourceSize:size + toBuffer:m_buffer + destinationOffset:offset + destinationBytesPerRow:stride + destinationBytesPerImage:stride * size.height]; + [download_encoder endEncoding]; + m_needs_flush = true; + } +} + +void Metal::StagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, // + AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, // + u32 dst_layer, u32 dst_level) +{ + @autoreleasepool + { + const size_t stride = m_config.GetStride(); + const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size; + const MTLSize size = + MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + g_state_tracker->EndRenderPass(); + m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf()); + id upload_encoder = [m_wait_buffer blitCommandEncoder]; + [upload_encoder setLabel:@"Texture Upload"]; + [upload_encoder copyFromBuffer:m_buffer + sourceOffset:offset + sourceBytesPerRow:stride + sourceBytesPerImage:stride * size.height + sourceSize:size + toTexture:static_cast(dst)->GetMTLTexture() + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)]; + [upload_encoder endEncoding]; + m_needs_flush = true; + } +} + +bool Metal::StagingTexture::Map() +{ + // Always mapped + return true; +} + +void Metal::StagingTexture::Unmap() +{ + // Always mapped +} + +void Metal::StagingTexture::Flush() +{ + m_needs_flush = false; + if (!m_wait_buffer) + return; + if ([m_wait_buffer status] != MTLCommandBufferStatusCompleted) + { + // Flush while we wait, since who knows how long we'll be sitting here + g_state_tracker->FlushEncoders(); + [m_wait_buffer waitUntilCompleted]; + } + m_wait_buffer = nullptr; +} + +Metal::Framebuffer::Framebuffer(AbstractTexture* color, AbstractTexture* depth, // + u32 width, u32 height, u32 layers, u32 samples) + : AbstractFramebuffer(color, depth, + color ? color->GetFormat() : AbstractTextureFormat::Undefined, // + depth ? depth->GetFormat() : AbstractTextureFormat::Undefined, // + width, height, layers, samples) +{ +} + +Metal::Framebuffer::~Framebuffer() = default; diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.h b/Source/Core/VideoBackends/Metal/MTLUtil.h new file mode 100644 index 0000000000..385f508648 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLUtil.h @@ -0,0 +1,54 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/TextureConfig.h" +#include "VideoCommon/VideoConfig.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +struct DeviceFeatures +{ + bool unified_memory; + bool subgroup_ops; +}; + +extern DeviceFeatures g_features; + +namespace Util +{ +struct Viewport +{ + float x; + float y; + float width; + float height; + float near_depth; + float far_depth; +}; + +/// Gets the list of Metal devices, ordered so the system default device is first +std::vector>> GetAdapterList(); +void PopulateBackendInfo(VideoConfig* config); +void PopulateBackendInfoAdapters(VideoConfig* config, + const std::vector>>& adapters); +void PopulateBackendInfoFeatures(VideoConfig* config, id device); + +AbstractTextureFormat ToAbstract(MTLPixelFormat format); +MTLPixelFormat FromAbstract(AbstractTextureFormat format); +static inline bool HasStencil(AbstractTextureFormat format) +{ + return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8; +} + +std::optional TranslateShaderToMSL(ShaderStage stage, std::string_view source); + +} // namespace Util +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm new file mode 100644 index 0000000000..b851a3d256 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -0,0 +1,358 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLUtil.h" + +#include +#include + +#include + +#include "Common/MsgHandler.h" + +#include "VideoCommon/Spirv.h" + +Metal::DeviceFeatures Metal::g_features; + +std::vector>> Metal::Util::GetAdapterList() +{ + std::vector>> list; + id default_dev = MTLCreateSystemDefaultDevice(); + if (default_dev) + list.push_back(MRCTransfer(default_dev)); + + auto devices = MRCTransfer(MTLCopyAllDevices()); + for (id device in devices.Get()) + { + if (device != default_dev) + list.push_back(MRCRetain(device)); + } + return list; +} + +void Metal::Util::PopulateBackendInfo(VideoConfig* config) +{ + config->backend_info.api_type = APIType::Metal; + config->backend_info.bUsesLowerLeftOrigin = false; + config->backend_info.bSupportsExclusiveFullscreen = false; + config->backend_info.bSupportsDualSourceBlend = true; + config->backend_info.bSupportsPrimitiveRestart = true; + config->backend_info.bSupportsGeometryShaders = false; + config->backend_info.bSupportsComputeShaders = true; + config->backend_info.bSupports3DVision = false; + config->backend_info.bSupportsEarlyZ = true; + config->backend_info.bSupportsBindingLayout = true; + config->backend_info.bSupportsBBox = true; + config->backend_info.bSupportsGSInstancing = false; + config->backend_info.bSupportsPostProcessing = true; + config->backend_info.bSupportsPaletteConversion = true; + config->backend_info.bSupportsClipControl = true; + config->backend_info.bSupportsSSAA = true; + config->backend_info.bSupportsFragmentStoresAndAtomics = true; + config->backend_info.bSupportsReversedDepthRange = false; + config->backend_info.bSupportsLogicOp = false; + config->backend_info.bSupportsMultithreading = false; + config->backend_info.bSupportsGPUTextureDecoding = true; + config->backend_info.bSupportsCopyToVram = true; + config->backend_info.bSupportsBitfield = true; + config->backend_info.bSupportsDynamicSamplerIndexing = true; + config->backend_info.bSupportsFramebufferFetch = false; + config->backend_info.bSupportsBackgroundCompiling = true; + config->backend_info.bSupportsLargePoints = true; + config->backend_info.bSupportsPartialDepthCopies = true; + config->backend_info.bSupportsDepthReadback = true; + config->backend_info.bSupportsShaderBinaries = false; + config->backend_info.bSupportsPipelineCacheData = false; + config->backend_info.bSupportsCoarseDerivatives = false; + config->backend_info.bSupportsTextureQueryLevels = true; + config->backend_info.bSupportsLodBiasInSampler = false; + config->backend_info.bSupportsSettingObjectNames = true; +} + +void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, + const std::vector>>& adapters) +{ + config->backend_info.Adapters.clear(); + for (id adapter : adapters) + { + config->backend_info.Adapters.push_back([[adapter name] UTF8String]); + } +} + +void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id device) +{ +#if TARGET_OS_OSX + config->backend_info.bSupportsDepthClamp = true; + config->backend_info.bSupportsST3CTextures = true; + config->backend_info.bSupportsBPTCTextures = true; +#else + bool supports_mac1 = false; + bool supports_apple4 = false; + if (@available(iOS 13, *)) + { + supports_mac1 = [device supportsFamily:MTLGPUFamilyMac1]; + supports_apple4 = [device supportsFamily:MTLGPUFamilyApple4]; + } + else + { + supports_apple4 = [device supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]; + } + config->backend_info.bSupportsDepthClamp = supports_mac1 || supports_apple4; + config->backend_info.bSupportsST3CTextures = supports_mac1; + config->backend_info.bSupportsBPTCTextures = supports_mac1; + config->backend_info.bSupportsFramebufferFetch = true; +#endif + if (char* env = getenv("MTL_UNIFIED_MEMORY")) + g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; + else if (@available(macOS 10.15, iOS 13.0, *)) + g_features.unified_memory = [device hasUnifiedMemory]; + else + g_features.unified_memory = false; + + g_features.subgroup_ops = false; + if (@available(macOS 10.15, iOS 13, *)) + { + // Requires SIMD-scoped reduction operations + g_features.subgroup_ops = + [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6]; + config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1]; + } + if ([[device name] containsString:@"AMD"]) + { + // Broken + g_features.subgroup_ops = false; + } +} + +// clang-format off + +AbstractTextureFormat Metal::Util::ToAbstract(MTLPixelFormat format) +{ + switch (format) + { + case MTLPixelFormatRGBA8Unorm: return AbstractTextureFormat::RGBA8; + case MTLPixelFormatBGRA8Unorm: return AbstractTextureFormat::BGRA8; + case MTLPixelFormatBC1_RGBA: return AbstractTextureFormat::DXT1; + case MTLPixelFormatBC2_RGBA: return AbstractTextureFormat::DXT3; + case MTLPixelFormatBC3_RGBA: return AbstractTextureFormat::DXT5; + case MTLPixelFormatBC7_RGBAUnorm: return AbstractTextureFormat::BPTC; + case MTLPixelFormatR16Unorm: return AbstractTextureFormat::R16; + case MTLPixelFormatDepth16Unorm: return AbstractTextureFormat::D16; + case MTLPixelFormatDepth24Unorm_Stencil8: return AbstractTextureFormat::D24_S8; + case MTLPixelFormatR32Float: return AbstractTextureFormat::R32F; + case MTLPixelFormatDepth32Float: return AbstractTextureFormat::D32F; + case MTLPixelFormatDepth32Float_Stencil8: return AbstractTextureFormat::D32F_S8; + default: return AbstractTextureFormat::Undefined; + } +} + +MTLPixelFormat Metal::Util::FromAbstract(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::RGBA8: return MTLPixelFormatRGBA8Unorm; + case AbstractTextureFormat::BGRA8: return MTLPixelFormatBGRA8Unorm; + case AbstractTextureFormat::DXT1: return MTLPixelFormatBC1_RGBA; + case AbstractTextureFormat::DXT3: return MTLPixelFormatBC2_RGBA; + case AbstractTextureFormat::DXT5: return MTLPixelFormatBC3_RGBA; + case AbstractTextureFormat::BPTC: return MTLPixelFormatBC7_RGBAUnorm; + case AbstractTextureFormat::R16: return MTLPixelFormatR16Unorm; + case AbstractTextureFormat::D16: return MTLPixelFormatDepth16Unorm; + case AbstractTextureFormat::D24_S8: return MTLPixelFormatDepth24Unorm_Stencil8; + case AbstractTextureFormat::R32F: return MTLPixelFormatR32Float; + case AbstractTextureFormat::D32F: return MTLPixelFormatDepth32Float; + case AbstractTextureFormat::D32F_S8: return MTLPixelFormatDepth32Float_Stencil8; + case AbstractTextureFormat::Undefined: return MTLPixelFormatInvalid; + } +} + +// clang-format on + +// MARK: Shader Translation + +static const std::string_view SHADER_HEADER = R"( +// Target GLSL 4.5. +#version 450 core +// Always available on Metal +#extension GL_EXT_shader_8bit_storage : require +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require + +#define ATTRIBUTE_LOCATION(x) layout(location = x) +#define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x) +#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) +#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) +#define SAMPLER_BINDING(x) layout(set = 1, binding = x) +#define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8)) +#define SSBO_BINDING(x) layout(std430, set = 2, binding = x) +#define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z) +#define VARYING_LOCATION(x) layout(location = x) +#define FORCE_EARLY_Z layout(early_fragment_tests) in + +// Metal framebuffer fetch helpers. +#define FB_FETCH_VALUE subpassLoad(in_ocol0) + +// hlsl to glsl function translation +#define API_METAL 1 +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 +#define frac fract +#define lerp mix + +// These were changed in Vulkan +#define gl_VertexID gl_VertexIndex +#define gl_InstanceID gl_InstanceIndex +)"; +static const std::string_view COMPUTE_SHADER_HEADER = R"( +// Target GLSL 4.5. +#version 450 core +// Always available on Metal +#extension GL_EXT_shader_8bit_storage : require +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require + +// All resources are packed into one descriptor set for compute. +#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) +#define SSBO_BINDING(x) layout(std430, set = 2, binding = x) +#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x) + +// hlsl to glsl function translation +#define API_METAL 1 +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 +#define frac fract +#define lerp mix +)"; +static const std::string_view SUBGROUP_HELPER_HEADER = R"( +#extension GL_KHR_shader_subgroup_basic : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_ballot : enable + +#define SUPPORTS_SUBGROUP_REDUCTION 1 +#define CAN_USE_SUBGROUP_REDUCTION true +#define IS_HELPER_INVOCATION gl_HelperInvocation +#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect()) +#define SUBGROUP_MIN(value) value = subgroupMin(value) +#define SUBGROUP_MAX(value) value = subgroupMax(value) +)"; + +static const std::string_view MSL_HEADER = + // We know our shader generator leaves unused variables. + "#pragma clang diagnostic ignored \"-Wunused-variable\"\n" + // These are usually when the compiler doesn't think a switch is exhaustive + "#pragma clang diagnostic ignored \"-Wreturn-type\"\n"; + +static constexpr spirv_cross::MSLResourceBinding +MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, // + u32 msl_buffer, u32 msl_texture, u32 msl_sampler) +{ + spirv_cross::MSLResourceBinding resource; + resource.stage = stage; + resource.desc_set = set; + resource.binding = binding; + resource.msl_buffer = msl_buffer; + resource.msl_texture = msl_texture; + resource.msl_sampler = msl_sampler; + return resource; +} + +std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, + std::string_view source) +{ + std::string full_source; + + std::string_view header = stage == ShaderStage::Compute ? COMPUTE_SHADER_HEADER : SHADER_HEADER; + full_source.reserve(header.size() + SUBGROUP_HELPER_HEADER.size() + source.size()); + + full_source.append(header); + if (Metal::g_features.subgroup_ops) + full_source.append(SUBGROUP_HELPER_HEADER); + full_source.append(source); + + std::optional code; + switch (stage) + { + case ShaderStage::Vertex: + code = SPIRV::CompileVertexShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + case ShaderStage::Geometry: + PanicAlertFmt("Tried to compile geometry shader for Metal, but Metal doesn't support them!"); + break; + case ShaderStage::Pixel: + code = SPIRV::CompileFragmentShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + case ShaderStage::Compute: + code = SPIRV::CompileComputeShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + } + if (!code.has_value()) + return std::nullopt; + + // clang-format off + static const spirv_cross::MSLResourceBinding resource_bindings[] = { + MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8 + MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo + MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo + MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image + MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo + MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo + }; + + spirv_cross::CompilerMSL::Options options; +#if TARGET_OS_OSX + options.platform = spirv_cross::CompilerMSL::Options::macOS; +#elif TARGET_OS_IOS + options.platform = spirv_cross::CompilerMSL::Options::iOS; +#else + #error What platform is this? +#endif + // clang-format on + + spirv_cross::CompilerMSL compiler(std::move(*code)); + + if (@available(macOS 11, iOS 14, *)) + options.set_msl_version(2, 3); + else if (@available(macOS 10.15, iOS 13, *)) + options.set_msl_version(2, 2); + else if (@available(macOS 10.14, iOS 12, *)) + options.set_msl_version(2, 1); + else + options.set_msl_version(2, 0); + options.use_framebuffer_fetch_subpasses = true; + compiler.set_msl_options(options); + + for (auto& binding : resource_bindings) + compiler.add_msl_resource_binding(binding); + + std::string msl(MSL_HEADER); + msl += compiler.compile(); + return msl; +} diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.h b/Source/Core/VideoBackends/Metal/MTLVertexManager.h new file mode 100644 index 0000000000..29c715f1fd --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoCommon/VertexManagerBase.h" + +namespace Metal +{ +class VertexManager final : public VertexManagerBase +{ +public: + VertexManager(); + ~VertexManager() override; + + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; + +protected: + void ResetBuffer(u32 vertex_stride) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadUniforms() override; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm new file mode 100644 index 0000000000..3683e79ba6 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm @@ -0,0 +1,89 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" + +Metal::VertexManager::VertexManager() +{ +} + +Metal::VertexManager::~VertexManager() = default; + +void Metal::VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ + g_state_tracker->SetUtilityUniform(uniforms, uniforms_size); +} + +bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size, + TexelBufferFormat format, u32* out_offset) +{ + *out_offset = 0; + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, data_size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, data, data_size); + g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, 0); + return true; +} + +bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size, + TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, + u32* out_palette_offset) +{ + *out_offset = 0; + *out_palette_offset = 0; + + const u32 aligned_data_size = g_state_tracker->Align(data_size, StateTracker::AlignMask::Other); + const u32 total_size = aligned_data_size + palette_size; + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, total_size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, data, data_size); + memcpy(static_cast(map.cpu_buffer) + aligned_data_size, palette_data, palette_size); + g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, + map.gpu_offset + aligned_data_size); + return true; +} + +void Metal::VertexManager::ResetBuffer(u32 vertex_stride) +{ + const u32 max_vertex_size = 65535 * vertex_stride; + void* vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, max_vertex_size); + void* index = + g_state_tracker->Preallocate(StateTracker::UploadBuffer::Index, MAXIBUFFERSIZE * sizeof(u16)); + + m_cur_buffer_pointer = m_base_buffer_pointer = static_cast(vertex); + m_end_buffer_pointer = m_base_buffer_pointer + max_vertex_size; + m_index_generator.Start(static_cast(index)); +} + +void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + const u32 vsize = num_vertices * vertex_stride; + const u32 isize = num_indices * sizeof(u16); + StateTracker::Map vmap = g_state_tracker->CommitPreallocation( + StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::Other); + StateTracker::Map imap = g_state_tracker->CommitPreallocation( + StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::Other); + + ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, vsize); + ADDSTAT(g_stats.this_frame.bytes_index_streamed, isize); + + g_state_tracker->SetVerticesAndIndices(vmap, imap); + *out_base_vertex = 0; + *out_base_index = 0; +} + +void Metal::VertexManager::UploadUniforms() +{ + g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, PixelShaderManager::dirty); + VertexShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} diff --git a/Source/Core/VideoBackends/Metal/VideoBackend.h b/Source/Core/VideoBackends/Metal/VideoBackend.h new file mode 100644 index 0000000000..3a567718e2 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/VideoBackend.h @@ -0,0 +1,27 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "VideoCommon/VideoBackendBase.h" + +namespace Metal +{ +class VideoBackend : public VideoBackendBase +{ +public: + bool Initialize(const WindowSystemInfo& wsi) override; + void Shutdown() override; + + std::string GetName() const override; + std::string GetDisplayName() const override; + std::optional GetWarningMessage() const override; + + void InitBackendInfo() override; + + void PrepareWindow(WindowSystemInfo& wsi) override; + + static constexpr const char* NAME = "Metal"; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 93df1ccc29..78f9d8e176 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -903,8 +903,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) } } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { glUseProgram(static_cast(shader)->GetGLComputeProgramID()); glDispatchCompute(groups_x, groups_y, groups_z); diff --git a/Source/Core/VideoBackends/OGL/OGLRender.h b/Source/Core/VideoBackends/OGL/OGLRender.h index 02266776ab..72da03ef29 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.h +++ b/Source/Core/VideoBackends/OGL/OGLRender.h @@ -125,8 +125,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp index db5037f427..36fa325ae7 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp @@ -641,8 +641,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) base_vertex, 0); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { StateTracker::GetInstance()->SetComputeShader(static_cast(shader)); if (StateTracker::GetInstance()->BindCompute()) diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.h b/Source/Core/VideoBackends/Vulkan/VKRenderer.h index 4e69bbe577..11fc7dd96e 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.h +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.h @@ -77,8 +77,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 345578d071..6426b259b4 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -34,6 +34,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { @@ -55,6 +56,7 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: code.Write("texture(samp{}, {})", n, coords); @@ -72,6 +74,7 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords); @@ -89,6 +92,7 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { @@ -138,6 +142,7 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 403d028525..8824e6ff77 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -113,8 +113,8 @@ public: virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {} // Dispatching compute shaders with currently-bound state. - virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) + virtual void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 511643e83a..b9965421cc 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -60,6 +60,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool case APIType::D3D: filename += "D3D"; break; + case APIType::Metal: + filename += "Metal"; + break; case APIType::OpenGL: filename += "OpenGL"; break; diff --git a/Source/Core/VideoCommon/Spirv.cpp b/Source/Core/VideoCommon/Spirv.cpp index b10b4eb92b..0fc22e1731 100644 --- a/Source/Core/VideoCommon/Spirv.cpp +++ b/Source/Core/VideoCommon/Spirv.cpp @@ -55,7 +55,7 @@ CompileShaderToSPV(EShLanguage stage, APIType api_type, glslang::TShader::ForbidIncluder includer; EProfile profile = ECoreProfile; EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules); - if (api_type == APIType::Vulkan) + if (api_type == APIType::Vulkan || api_type == APIType::Metal) messages = static_cast(messages | EShMsgVulkanRules); int default_version = 450; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 2f896027ad..07bbe0aa2e 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -2924,7 +2924,8 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con auto dispatch_groups = TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height); - g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1); + g_renderer->DispatchComputeShader(shader, info->group_size_x, info->group_size_y, 1, + dispatch_groups.first, dispatch_groups.second, 1); // Copy from decoding texture -> final texture // This is because we don't want to have to create compute view for every layer diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 1e7f2b64b5..459b12b9c8 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -519,10 +519,49 @@ UBO_BINDING(std140, 1) uniform UBO { uint u_palette_offset; }; -TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; -#ifdef HAS_PALETTE -TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#if defined(API_METAL) + +#if defined(TEXEL_BUFFER_FORMAT_R8) + SSBO_BINDING(0) readonly buffer Input { uint8_t s_input_buffer[]; }; + #define FETCH(offset) uint(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_R16) + SSBO_BINDING(0) readonly buffer Input { uint16_t s_input_buffer[]; }; + #define FETCH(offset) uint(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_RGBA8) + SSBO_BINDING(0) readonly buffer Input { u8vec4 s_input_buffer[]; }; + #define FETCH(offset) uvec4(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_R32G32) + SSBO_BINDING(0) readonly buffer Input { uvec2 s_input_buffer[]; }; + #define FETCH(offset) s_input_buffer[offset] +#else + #error No texel buffer? #endif + +#ifdef HAS_PALETTE + SSBO_BINDING(1) readonly buffer Palette { uint16_t s_palette_buffer[]; }; + #define FETCH_PALETTE(offset) uint(s_palette_buffer[offset]) +#endif + +#else + +TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; + +#if defined(TEXEL_BUFFER_FORMAT_R8) || defined(TEXEL_BUFFER_FORMAT_R16) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).r +#elif defined(TEXEL_BUFFER_FORMAT_RGBA8) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)) +#elif defined(TEXEL_BUFFER_FORMAT_R32G32) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).rg +#else + #error No texel buffer? +#endif + +#ifdef HAS_PALETTE + TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; + #define FETCH_PALETTE(offset) texelFetch(s_palette_buffer, int((offset) + u_palette_offset)).r +#endif + +#endif // defined(API_METAL) IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; #define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier(); @@ -563,7 +602,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords) { uint2 block = coords / block_size; uint2 offset = coords % block_size; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * (block_size.x * block_size.y); buffer_pos += offset.y * block_size.x; @@ -575,7 +614,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords) uint4 GetPaletteColor(uint index) { // Fetch and swap BE to LE. - uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x); + uint val = Swap16(FETCH_PALETTE(index)); uint4 color; #if defined(PALETTE_FORMAT_IA8) @@ -633,14 +672,14 @@ static const std::map s_decoding_shader_info{ // the size of the buffer elements. uint2 block = coords.xy / 8u; uint2 offset = coords.xy % 8u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * 32u; buffer_pos += offset.y * 4u; buffer_pos += offset.x / 2u; // Select high nibble for odd texels, low for even. - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint i; if ((coords.x & 1u) == 0u) i = Convert4To8((val >> 4)); @@ -663,7 +702,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint i = Convert4To8((val & 0x0Fu)); uint a = Convert4To8((val >> 4)); uint4 color = uint4(i, i, i, a); @@ -681,7 +720,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint i = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint i = FETCH(buffer_pos); uint4 color = uint4(i, i, i, i); float4 norm_color = float4(color) / 255.0; @@ -697,7 +736,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks, 16 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint a = (val & 0xFFu); uint i = (val >> 8); uint4 color = uint4(i, i, i, a); @@ -714,7 +753,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); + uint val = Swap16(FETCH(buffer_pos)); uint4 color; color.x = Convert5To8(bitfieldExtract(val, 11, 5)); @@ -736,7 +775,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); + uint val = Swap16(FETCH(buffer_pos)); uint4 color; if ((val & 0x8000u) != 0u) @@ -771,7 +810,7 @@ static const std::map s_decoding_shader_info{ // for the entire block, then the GB channels afterwards. uint2 block = coords.xy / 4u; uint2 offset = coords.xy % 4u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; // Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes. buffer_pos += block.y * u_src_row_stride; @@ -780,8 +819,8 @@ static const std::map s_decoding_shader_info{ buffer_pos += offset.x; // The two GB channels follow after the block's AR channels. - uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x; - uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x; + uint val1 = FETCH(buffer_pos + 0u); + uint val2 = FETCH(buffer_pos + 16u); uint4 color; color.a = (val1 & 0xFFu); @@ -835,14 +874,14 @@ static const std::map s_decoding_shader_info{ // Calculate tiled block coordinates. uint2 tile_block_coords = block_coords / 2u; uint2 subtile_block_coords = block_coords % 2u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += tile_block_coords.y * u_src_row_stride; buffer_pos += tile_block_coords.x * 4u; buffer_pos += subtile_block_coords.y * 2u; buffer_pos += subtile_block_coords.x; // Read the entire DXT block to shared memory. - uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; + uint2 raw_data = FETCH(buffer_pos); shared_temp[block_in_group] = raw_data; } @@ -921,14 +960,14 @@ static const std::map s_decoding_shader_info{ // the size of the buffer elements. uint2 block = coords.xy / 8u; uint2 offset = coords.xy % 8u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * 32u; buffer_pos += offset.y * 4u; buffer_pos += offset.x / 2u; // Select high nibble for odd texels, low for even. - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu); float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); @@ -945,7 +984,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint index = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint index = FETCH(buffer_pos); float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); } @@ -960,7 +999,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks, 16 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu; + uint index = Swap16(FETCH(buffer_pos)) & 0x3FFFu; float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); } @@ -976,8 +1015,8 @@ static const std::map s_decoding_shader_info{ DEFINE_MAIN(8, 8) { uint2 uv = gl_GlobalInvocationID.xy; - int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u)); - float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos)); + uint buffer_pos = (uv.y * u_src_row_stride) + (uv.x / 2u); + float4 yuyv = float4(FETCH(buffer_pos)); float y = (uv.x & 1u) != 0u ? yuyv.b : yuyv.r; @@ -1034,6 +1073,25 @@ std::string GenerateDecodingShader(TextureFormat format, std::optionalbuffer_format) + { + case TEXEL_BUFFER_FORMAT_R8_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R8 1\n"; + break; + case TEXEL_BUFFER_FORMAT_R16_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R16 1\n"; + break; + case TEXEL_BUFFER_FORMAT_RGBA8_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_RGBA8 1\n"; + break; + case TEXEL_BUFFER_FORMAT_R32G32_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R32G32 1\n"; + break; + case NUM_TEXEL_BUFFER_FORMATS: + ASSERT(0); + break; + } + ss << decoding_shader_header; ss << info->shader_body; @@ -1121,7 +1179,10 @@ float4 DecodePixel(int val) ss << "\n"; - ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; + if (api_type == APIType::Metal) + ss << "SSBO_BINDING(0) readonly buffer Palette { uint16_t palette[]; };\n"; + else + ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; @@ -1143,9 +1204,12 @@ float4 DecodePixel(int val) ss << "void main() {\n"; ss << " float3 coords = v_tex0;\n"; ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; - ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; + if (api_type == APIType::Metal) + ss << " src = int(palette[uint(src)]);\n"; + else + ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; - ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n"; + ss << " src = ((src << 8) | (src >> 8)) & 0xFFFF;\n"; ss << " ocol0 = DecodePixel(src);\n"; ss << "}\n"; diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 0c8475123e..22708f3d66 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -82,7 +82,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, #ifdef __APPLE__ // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // if we want to use it. - if (api_type == APIType::Vulkan) + if (api_type == APIType::Vulkan || api_type == APIType::Metal) { if (use_dual_source) { diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index 6028b7b14a..2a3c15db0a 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -35,6 +35,9 @@ #ifdef HAS_VULKAN #include "VideoBackends/Vulkan/VideoBackend.h" #endif +#ifdef __APPLE__ +#include "VideoBackends/Metal/VideoBackend.h" +#endif #include "VideoCommon/AsyncRequests.h" #include "VideoCommon/BPStructs.h" @@ -227,6 +230,7 @@ const std::vector>& VideoBackendBase::GetAvail #ifdef __APPLE__ // Emplace the Vulkan backend at the beginning so it takes precedence over OpenGL. backends.emplace(backends.begin(), std::make_unique()); + backends.push_back(std::make_unique()); #else backends.push_back(std::make_unique()); #endif diff --git a/Source/Core/VideoCommon/VideoCommon.h b/Source/Core/VideoCommon/VideoCommon.h index 390224eda4..3b6540c822 100644 --- a/Source/Core/VideoCommon/VideoCommon.h +++ b/Source/Core/VideoCommon/VideoCommon.h @@ -39,6 +39,7 @@ enum class APIType OpenGL, D3D, Vulkan, + Metal, Nothing };