From 7d97c539f3915de9b02f76a017634e7b5edf00f2 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 6 Aug 2023 01:54:41 +1000 Subject: [PATCH] Metal Renderer --- src/core/CMakeLists.txt | 12 + src/core/gpu.h | 4 + src/core/gpu/gpu_device.cpp | 13 +- src/core/gpu/gpu_device.h | 3 +- src/core/gpu/metal_device.h | 373 +++++ src/core/gpu/metal_device.mm | 1955 +++++++++++++++++++++++++++ src/core/gpu/metal_stream_buffer.h | 65 + src/core/gpu/metal_stream_buffer.mm | 253 ++++ src/core/gpu_hw.cpp | 18 + src/core/gpu_hw_shadergen.cpp | 2 + src/core/settings.cpp | 6 + src/core/shadergen.cpp | 20 +- src/core/shadergen.h | 2 + src/core/system.cpp | 6 + src/core/types.h | 3 + 15 files changed, 2725 insertions(+), 10 deletions(-) create mode 100644 src/core/gpu/metal_device.h create mode 100644 src/core/gpu/metal_device.mm create mode 100644 src/core/gpu/metal_stream_buffer.h create mode 100644 src/core/gpu/metal_stream_buffer.mm diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 265b2e87a..5b296c16b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -171,6 +171,18 @@ if(WIN32) target_link_libraries(core PRIVATE winmm.lib) endif() +if(APPLE) + target_sources(core PRIVATE + gpu/metal_device.h + gpu/metal_device.mm + gpu/metal_stream_buffer.h + gpu/metal_stream_buffer.mm + ) + find_library(METAL_LIBRARY Metal) + find_library(QUARTZCORE_LIBRARY QuartzCore) + target_link_libraries(core PRIVATE ${METAL_LIBRARY} ${QUARTZCORE_LIBRARY}) +endif() + if(USE_X11) target_sources(common PRIVATE gl/x11_window.cpp diff --git a/src/core/gpu.h b/src/core/gpu.h index ccc251b72..bc5c0d64d 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -167,6 +167,10 @@ public: static std::unique_ptr CreateHardwareD3D12Renderer(); #endif +#ifdef __APPLE__ + static std::unique_ptr CreateHardwareMetalRenderer(); +#endif + #ifdef WITH_OPENGL // gpu_hw_opengl.cpp static std::unique_ptr CreateHardwareOpenGLRenderer(); diff --git a/src/core/gpu/gpu_device.cpp b/src/core/gpu/gpu_device.cpp index 0308562c4..60a93f1ab 100644 --- a/src/core/gpu/gpu_device.cpp +++ b/src/core/gpu/gpu_device.cpp @@ -34,6 +34,10 @@ Log_SetChannel(GPUDevice); #include "d3d12_gpu_device.h" #endif +#ifdef __APPLE__ +extern std::unique_ptr WrapNewMetalDevice(); +#endif + #ifdef WITH_OPENGL #include "opengl_device.h" #endif @@ -202,7 +206,7 @@ RenderAPI GPUDevice::GetPreferredAPI() #ifdef _WIN32___ // TODO remove me return RenderAPI::D3D11; #else - return RenderAPI::OpenGL; + return RenderAPI::Metal; #endif } @@ -1548,13 +1552,18 @@ std::unique_ptr Host::CreateDisplayForAPI(RenderAPI api) return std::make_unique(); #endif +#ifdef __APPLE__ + case RenderAPI::Metal: + return WrapNewMetalDevice(); +#endif + default: #if defined(_WIN32) && defined(_M_ARM64) return std::make_unique(); #elif defined(_WIN32) return std::make_unique(); #elif defined(__APPLE__) - return WrapNewMetalDevice(); + return WrapNewMetalDevice(); #elif defined(WITH_OPENGL) return std::make_unique(); #elif defined(WITH_VULKAN) diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index 56057cb62..14ec08870 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -26,7 +26,8 @@ enum class RenderAPI : u32 D3D12, Vulkan, OpenGL, - OpenGLES + OpenGLES, + Metal }; class GPUFramebuffer diff --git a/src/core/gpu/metal_device.h b/src/core/gpu/metal_device.h new file mode 100644 index 000000000..11f66156f --- /dev/null +++ b/src/core/gpu/metal_device.h @@ -0,0 +1,373 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "gpu_device.h" +#include "metal_stream_buffer.h" +#include "postprocessing_chain.h" + +#include "common/rectangle.h" +#include "common/timer.h" +#include "common/window_info.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef __OBJC__ +#error This file needs to be compiled with Objective C++. +#endif + +#if __has_feature(objc_arc) +#error ARC should not be enabled. +#endif + +class MetalDevice; +class MetalFramebuffer; +class MetalPipeline; +class MetalTexture; + +class MetalSampler final : public GPUSampler +{ + friend MetalDevice; + +public: + ~MetalSampler() override; + + ALWAYS_INLINE id GetSamplerState() const { return m_ss; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalSampler(id ss); + + id m_ss; +}; + +class MetalShader final : public GPUShader +{ + friend MetalDevice; + +public: + ~MetalShader() override; + + ALWAYS_INLINE id GetLibrary() const { return m_library; } + ALWAYS_INLINE id GetFunction() const { return m_function; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalShader(GPUShaderStage stage, id library, id function); + + id m_library; + id m_function; +}; + +class MetalPipeline final : public GPUPipeline +{ + friend MetalDevice; + +public: + ~MetalPipeline() override; + + ALWAYS_INLINE id GetPipelineState() const { return m_pipeline; } + ALWAYS_INLINE id GetDepthState() const { return m_depth; } + ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } + ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } + + void SetDebugName(const std::string_view& name) override; + +private: + MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive); + + id m_pipeline; + id m_depth; + MTLCullMode m_cull_mode; + MTLPrimitiveType m_primitive; +}; + +class MetalTexture final : public GPUTexture +{ + friend MetalDevice; + +public: + ~MetalTexture(); + + ALWAYS_INLINE id GetMTLTexture() const { return m_texture; } + + bool Create(id device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0); + void Destroy(); + + bool IsValid() const override; + + bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; + bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; + void Unmap() override; + + void SetDebugName(const std::string_view& name) override; + +private: + MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, + Format format); + + id m_texture; + + u16 m_map_x = 0; + u16 m_map_y = 0; + u16 m_map_width = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; +}; + +#if 0 +class MetalTextureBuffer final : public GPUTextureBuffer +{ +public: + MetalTextureBuffer(Format format, u32 size_in_elements); + ~MetalTextureBuffer() override; + + ALWAYS_INLINE IMetalBuffer* GetBuffer() const { return m_buffer.GetD3DBuffer(); } + ALWAYS_INLINE IMetalShaderResourceView* GetSRV() const { return m_srv.Get(); } + ALWAYS_INLINE IMetalShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); } + + bool CreateBuffer(IMetalDevice* device); + + // Inherited via GPUTextureBuffer + virtual void* Map(u32 required_elements) override; + virtual void Unmap(u32 used_elements) override; + +private: + MetalStreamBuffer m_buffer; + Microsoft::WRL::ComPtr m_srv; +}; +#endif + +class MetalFramebuffer final : public GPUFramebuffer +{ + friend MetalDevice; + +public: + ~MetalFramebuffer() override; + + MTLRenderPassDescriptor* GetDescriptor() const; + + void SetDebugName(const std::string_view& name) override; + +private: + MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, id ds_tex, + MTLRenderPassDescriptor* descriptor); + + id m_rt_tex; + id m_ds_tex; + MTLRenderPassDescriptor* m_descriptor; +}; + +class MetalDevice final : public GPUDevice +{ +public: + ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_host_display.get()); } + ALWAYS_INLINE static id GetMTLDevice() { return GetInstance().m_device; } + ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } + ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } + + MetalDevice(); + ~MetalDevice(); + + RenderAPI GetRenderAPI() const override; + + bool HasSurface() const override; + + bool CreateDevice(const WindowInfo& wi, bool vsync) override; + bool SetupDevice() override; + + bool MakeCurrent() override; + bool DoneCurrent() override; + + bool ChangeWindow(const WindowInfo& new_wi) override; + void ResizeWindow(s32 new_window_width, s32 new_window_height) override; + bool SupportsFullscreen() const override; + bool IsFullscreen() override; + bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; + AdapterAndModeList GetAdapterAndModeList() override; + void DestroySurface() override; + + std::string GetShaderCacheBaseName(const std::string_view& type, bool debug) const override; + + std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data = nullptr, u32 data_stride = 0, + bool dynamic = false) override; + std::unique_ptr CreateSampler(const GPUSampler::Config& config) override; + std::unique_ptr CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; + + bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) override; + bool SupportsTextureFormat(GPUTexture::Format format) const override; + void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, + u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; + + std::unique_ptr CreateFramebuffer(GPUTexture* rt = nullptr, u32 rt_layer = 0, u32 rt_level = 0, + GPUTexture* ds = nullptr, u32 ds_layer = 0, + u32 ds_level = 0) override; + + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + std::vector* out_binary = nullptr) override; + std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + + bool GetHostRefreshRate(float* refresh_rate) override; + + bool SetGPUTimingEnabled(bool enabled) override; + float GetAndResetAccumulatedGPUTime() override; + + void SetVSync(bool enabled) override; + + bool BeginPresent(bool skip_present) override; + void EndPresent() override; + + void WaitForFenceCounter(u64 counter); + + ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } + id GetTextureUploadEncoder(bool is_inline); + + void SubmitCommandBuffer(); + void SubmitCommandBufferAndRestartRenderPass(const char* reason); + + void UnbindFramebuffer(MetalFramebuffer* fb); + void UnbindPipeline(MetalPipeline* pl); + void UnbindTexture(MetalTexture* tex); + + static AdapterAndModeList StaticGetAdapterAndModeList(); + +private: + static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; + static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32/*16*/ * 1024 * 1024; // TODO reduce after separate allocations + static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; + + using DepthStateMap = std::unordered_map>; + + ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } + + void SetFeatures(); + + std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, const std::string_view& entry_point); + + id GetDepthState(const GPUPipeline::DepthState& ds); + + void CreateCommandBuffer(); + void CommandBufferCompleted(u64 fence_counter); + + ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } + ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } + void BeginRenderPass(); + void EndRenderPass(); + void EndInlineUploading(); + void EndAnyEncoding(); + + void PreDrawCheck(); + void SetInitialEncoderState(); + void SetUniformBufferInRenderEncoder(); + void SetViewportInRenderEncoder(); + void SetScissorInRenderEncoder(); + + //bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); + //void DestroyStagingBuffer(); + + bool CreateLayer(); + void DestroyLayer(); + + bool CreateBuffers(); + void DestroyBuffers(); + + bool CreateTimestampQueries(); + void DestroyTimestampQueries(); + void PopTimestampQuery(); + void KickTimestampQuery(); + + id m_device; + id m_queue; + + CAMetalLayer* m_layer = nil; + id m_layer_drawable = nil; + MTLRenderPassDescriptor* m_layer_pass_desc = nil; + + std::mutex m_fence_mutex; + u64 m_current_fence_counter = 0; + std::atomic m_completed_fence_counter{0}; + + DepthStateMap m_depth_states; + +// ComPtr m_readback_staging_texture; +// DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; +// u32 m_readback_staging_texture_width = 0; +// u32 m_readback_staging_texture_height = 0; + + MetalStreamBuffer m_vertex_buffer; + MetalStreamBuffer m_index_buffer; + MetalStreamBuffer m_uniform_buffer; + MetalStreamBuffer m_texture_upload_buffer; + + id m_upload_cmdbuf = nil; + id m_upload_encoder = nil; + id m_inline_upload_encoder = nil; + + id m_render_cmdbuf = nil; + id m_render_encoder = nil; + + MetalFramebuffer* m_current_framebuffer = nullptr; + + MetalPipeline* m_current_pipeline = nullptr; + id m_current_depth_state = nil; + MTLCullMode m_current_cull_mode = MTLCullModeNone; + u32 m_current_uniform_buffer_position = 0; + + std::array, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; + std::array, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; + Common::Rectangle m_current_viewport = {}; + Common::Rectangle m_current_scissor = {}; + + bool m_vsync_enabled = false; + +// std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; +// u8 m_read_timestamp_query = 0; +// u8 m_write_timestamp_query = 0; +// u8 m_waiting_timestamp_queries = 0; +// bool m_timestamp_query_started = false; +// float m_accumulated_gpu_time = 0.0f; +}; diff --git a/src/core/gpu/metal_device.mm b/src/core/gpu/metal_device.mm new file mode 100644 index 000000000..031fddd68 --- /dev/null +++ b/src/core/gpu/metal_device.mm @@ -0,0 +1,1955 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "metal_device.h" +#include "spirv_compiler.h" +#include "../host_settings.h" +#include "../shader_cache_version.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/file_system.h" +#include "common/log.h" +#include "common/path.h" +#include "common/string_util.h" + +// TODO FIXME... +#define FMT_EXCEPTIONS 0 +#include "fmt/format.h" + +#include +#include +#include + +Log_SetChannel(MetalDevice); + +static constexpr MTLPixelFormat LAYER_MTL_PIXEL_FORMAT = MTLPixelFormatRGBA8Unorm; +static constexpr GPUTexture::Format LAYER_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; + +// Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems +// to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. +static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; + +// The pitch alignment must be less or equal to the upload alignment. +// We need 32 here for AVX2, so 64 is also fine. +static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; + +static constexpr std::array(GPUTexture::Format::Count)> s_pixel_format_mapping = { + MTLPixelFormatInvalid, // Unknown + MTLPixelFormatRGBA8Unorm, // RGBA8 + MTLPixelFormatBGRA8Unorm, // BGRA8 + MTLPixelFormatB5G6R5Unorm, // RGB565 + MTLPixelFormatA1BGR5Unorm,// RGBA5551 + MTLPixelFormatR8Unorm, // R8 + MTLPixelFormatDepth16Unorm, // D16 +}; + +static constexpr std::array s_clear_color = {}; + +static unsigned s_next_bad_shader_id = 1; + +static NSString* StringViewToNSString(const std::string_view& str) +{ + if (str.empty()) + return nil; + + return [[[NSString alloc] autorelease] initWithBytes:str.data() length:static_cast(str.length()) encoding:NSUTF8StringEncoding]; +} + +static void LogNSError(NSError* error, const char* desc, ...) +{ + std::va_list ap; + va_start(ap, desc); + Log::Writev("MetalDevice", "", LOGLEVEL_ERROR, desc, ap); + va_end(ap); + + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Code: %u", static_cast(error.code)); + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Description: %s", [error.description UTF8String]); +} + +template +static void RunOnMainThread(F&& f) +{ + if ([NSThread isMainThread]) + f(); + else + dispatch_sync(dispatch_get_main_queue(), f); +} + +MetalDevice::MetalDevice() +: m_current_viewport(0, 0, 1, 1) +, m_current_scissor(0, 0, 1, 1) +{ +} + +MetalDevice::~MetalDevice() +{ + // TODO: Make virtual Destroy() method instead due to order of shit.. + //DestroyStagingBuffer(); + DestroyResources(); + DestroyBuffers(); + DestroySurface(); + + if (m_device != nil) + [m_device release]; +} + +RenderAPI MetalDevice::GetRenderAPI() const +{ + return RenderAPI::Metal; +} + +bool MetalDevice::HasSurface() const +{ + // TODO FIXME + //return static_cast(m_swap_chain); + return false; +} + +bool MetalDevice::GetHostRefreshRate(float* refresh_rate) +{ +#if 0 + if (m_swap_chain && IsFullscreen()) + { + DXGI_SWAP_CHAIN_DESC desc; + if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && + desc.BufferDesc.RefreshRate.Denominator > 0) + { + Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, + desc.BufferDesc.RefreshRate.Denominator); + *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / + static_cast(desc.BufferDesc.RefreshRate.Denominator); + return true; + } + } + + return GPUDevice::GetHostRefreshRate(refresh_rate); +#else + Panic("Fixme"); + return false; +#endif +} + +void MetalDevice::SetVSync(bool enabled) +{ + m_vsync_enabled = enabled; +} + +bool MetalDevice::CreateDevice(const WindowInfo& wi, bool vsync) +{ @autoreleasepool { + // TODO: adapter selection + id device = [MTLCreateSystemDefaultDevice() autorelease]; + if (device == nil) + { + Log_ErrorPrint("Failed to create default Metal device."); + return false; + } + + id queue = [[device newCommandQueue] autorelease]; + if (queue == nil) + { + Log_ErrorPrint("Failed to create command queue."); + return false; + } + + m_device = [device retain]; + m_queue = [queue retain]; + Log_InfoPrintf("Metal Device: %s", [[m_device name] UTF8String]); + + SetFeatures(); + + m_window_info = wi; + m_vsync_enabled = vsync; + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) + { + m_window_info = {}; + return false; + } + + CreateCommandBuffer(); + return true; +} } + +bool MetalDevice::SetupDevice() +{ + if (!GPUDevice::SetupDevice()) + return false; + + if (!CreateBuffers() || !CreateResources()) + return false; + + return true; +} + +void MetalDevice::SetFeatures() +{ + // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d + if ([m_device supportsFamily:MTLGPUFamilyMacCatalyst1] || + [m_device supportsFamily:MTLGPUFamilyMac1] || + [m_device supportsFamily:MTLGPUFamilyApple3]) + { + m_max_texture_size = 16384; + } + else + { + m_max_texture_size = 8192; + } + + m_max_multisamples = 0; + for (u32 multisamples = 1; multisamples < 16; multisamples++) + { + if (![m_device supportsTextureSampleCount:multisamples]) + break; + m_max_multisamples = multisamples; + } + + m_features.dual_source_blend = true; + m_features.per_sample_shading = true; + m_features.mipmapped_render_targets = true; + m_features.noperspective_interpolation = true; + m_features.supports_texture_buffers = true; + m_features.texture_buffers_emulated_with_ssbo = true; +} + +bool MetalDevice::MakeCurrent() +{ + return true; +} + +bool MetalDevice::DoneCurrent() +{ + return true; +} + +bool MetalDevice::CreateLayer() +{ @autoreleasepool { + RunOnMainThread([this]() { @autoreleasepool { + Log_InfoPrintf("Creating a %ux%u Metal layer.", m_window_info.surface_width,m_window_info.surface_height); + const auto size = CGSizeMake(static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)); + m_layer = [CAMetalLayer layer]; + [m_layer setDevice:m_device]; + [m_layer setDrawableSize:size]; + [m_layer setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + NSView* view = GetWindowView(); + [view setWantsLayer:TRUE]; + [view setLayer:m_layer]; + }}); + + DebugAssert(m_layer_pass_desc == nil); + m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; + m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore; + m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0); + return true; +}} + +void MetalDevice::DestroyLayer() +{ + if (m_layer == nil) + return; + + [m_layer_pass_desc release]; + m_layer_pass_desc = nil; + + RunOnMainThread([this]() { + NSView* view = GetWindowView(); + [view setLayer:nil]; + [view setWantsLayer:FALSE]; + [m_layer release]; + m_layer = nullptr; + }); +} + +bool MetalDevice::ChangeWindow(const WindowInfo& new_wi) +{ +#if 0 + DestroySurface(); + + m_window_info = new_wi; + return CreateSwapChain(nullptr); +#else + return false; +#endif +} + +void MetalDevice::DestroySurface() +{ +#if 0 + m_window_info.SetSurfaceless(); + if (IsFullscreen()) + SetFullscreen(false, 0, 0, 0.0f); + + m_swap_chain_rtv.Reset(); + m_swap_chain.Reset(); +#else + Panic("Fixme"); +#endif +} + +std::string MetalDevice::GetShaderCacheBaseName(const std::string_view& type, bool debug) const +{ + return fmt::format("metal_{}{}", type, debug ? "_debug" : ""); +} + +void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height) +{ +#if 0 + if (!m_swap_chain) + return; + + m_swap_chain_rtv.Reset(); + + HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, + m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); + if (FAILED(hr)) + Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); + + if (!CreateSwapChainRTV()) + Panic("Failed to recreate swap chain RTV after resize"); +#else + Panic("Fixme"); + // adjust pass desc +#endif +} + +bool MetalDevice::SupportsFullscreen() const +{ + return false; +} + +bool MetalDevice::IsFullscreen() +{ + return false; +} + +bool MetalDevice::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) +{ + return false; +} + +bool MetalDevice::CreateBuffers() +{ + if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || + !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || + !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) || + !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) + { + Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); + return false; + } + + return true; +} + +void MetalDevice::DestroyBuffers() +{ + m_texture_upload_buffer.Destroy(); + m_uniform_buffer.Destroy(); + m_vertex_buffer.Destroy(); + m_index_buffer.Destroy(); + + for (auto& it : m_depth_states) + { + if (it.second != nil) + [it.second release]; + } + m_depth_states.clear(); +} + +GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() +{ + return {}; +} + +GPUDevice::AdapterAndModeList MetalDevice::GetAdapterAndModeList() +{ + return StaticGetAdapterAndModeList(); +} + +#if 0 +bool MetalDevice::CreateTimestampQueries() +{ + for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) + { + for (u32 j = 0; j < 3; j++) + { + const CMetal_QUERY_DESC qdesc((j == 0) ? Metal_QUERY_TIMESTAMP_DISJOINT : Metal_QUERY_TIMESTAMP); + const HRESULT hr = m_device->CreateQuery(&qdesc, m_timestamp_queries[i][j].ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + m_timestamp_queries = {}; + return false; + } + } + } + + KickTimestampQuery(); + return true; +} + +void MetalDevice::DestroyTimestampQueries() +{ + if (!m_timestamp_queries[0][0]) + return; + + if (m_timestamp_query_started) + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + + m_timestamp_queries = {}; + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; +} + +void MetalDevice::PopTimestampQuery() +{ + while (m_waiting_timestamp_queries > 0) + { + Metal_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + const HRESULT disjoint_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][0].Get(), &disjoint, + sizeof(disjoint), Metal_ASYNC_GETDATA_DONOTFLUSH); + if (disjoint_hr != S_OK) + break; + + if (disjoint.Disjoint) + { + Log_VerbosePrintf("GPU timing disjoint, resetting."); + m_read_timestamp_query = 0; + m_write_timestamp_query = 0; + m_waiting_timestamp_queries = 0; + m_timestamp_query_started = 0; + } + else + { + u64 start = 0, end = 0; + const HRESULT start_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][1].Get(), &start, + sizeof(start), Metal_ASYNC_GETDATA_DONOTFLUSH); + const HRESULT end_hr = m_context->GetData(m_timestamp_queries[m_read_timestamp_query][2].Get(), &end, sizeof(end), + Metal_ASYNC_GETDATA_DONOTFLUSH); + if (start_hr == S_OK && end_hr == S_OK) + { + const float delta = + static_cast(static_cast(end - start) / (static_cast(disjoint.Frequency) / 1000.0)); + m_accumulated_gpu_time += delta; + m_read_timestamp_query = (m_read_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_waiting_timestamp_queries--; + } + } + } + + if (m_timestamp_query_started) + { + m_context->End(m_timestamp_queries[m_write_timestamp_query][2].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_write_timestamp_query = (m_write_timestamp_query + 1) % NUM_TIMESTAMP_QUERIES; + m_timestamp_query_started = false; + m_waiting_timestamp_queries++; + } +} + +void MetalDevice::KickTimestampQuery() +{ + if (m_timestamp_query_started || !m_timestamp_queries[0][0] || m_waiting_timestamp_queries == NUM_TIMESTAMP_QUERIES) + return; + + m_context->Begin(m_timestamp_queries[m_write_timestamp_query][0].Get()); + m_context->End(m_timestamp_queries[m_write_timestamp_query][1].Get()); + m_timestamp_query_started = true; +} +#endif + +bool MetalDevice::SetGPUTimingEnabled(bool enabled) +{ +#if 0 + if (m_gpu_timing_enabled == enabled) + return true; + + m_gpu_timing_enabled = enabled; + if (m_gpu_timing_enabled) + { + if (!CreateTimestampQueries()) + return false; + + KickTimestampQuery(); + return true; + } + else + { + DestroyTimestampQueries(); + return true; + } +#else + return false; +#endif +} + +float MetalDevice::GetAndResetAccumulatedGPUTime() +{ +#if 0 + const float value = m_accumulated_gpu_time; + m_accumulated_gpu_time = 0.0f; + return value; +#else + return 0.0f; +#endif +} + +MetalShader::MetalShader(GPUShaderStage stage, id library, id function) + : GPUShader(stage), m_library(library), m_function(function) +{ +} + +MetalShader::~MetalShader() +{ + [m_function release]; + [m_library release]; +} + +void MetalShader::SetDebugName(const std::string_view& name) +{ @autoreleasepool { + [m_function setLabel:StringViewToNSString(name)]; +} } + +// TODO: Clean this up, somehow.. +namespace EmuFolders +{ +extern std::string DataRoot; +} +static void DumpShader(u32 n, const std::string_view& suffix, const std::string_view& data) +{ + if (data.empty()) + return; + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("shader{}_{}.txt", suffix, n)).c_str(), "wb"); + if (!fp) + return; + + std::fwrite(data.data(), data.length(), 1, fp.get()); +} + +std::unique_ptr MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, const std::string_view& entry_point) +{ @autoreleasepool { + NSString* const ns_source = StringViewToNSString(source); + NSError* error = nullptr; + id library = [m_device newLibraryWithSource:ns_source options:nil error:&error]; + if (!library) + { + LogNSError(error, "Failed to compile %s shader", GPUShader::GetStageName(stage)); + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile %s failed: %u\n", GPUShader::GetStageName(stage), static_cast(error.code)); + + const char* utf_error = [error.description UTF8String]; + std::fwrite(utf_error, std::strlen(utf_error), 1, fp.get()); + } + + return {}; + } + + id function = [library newFunctionWithName:StringViewToNSString(entry_point)]; + if (!function) + { + Log_ErrorPrintf("Failed to get main function in compiled library"); + return {}; + } + + return std::unique_ptr(new MetalShader(stage, [library retain], [function retain])); +} } + +std::unique_ptr MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) +{ + const std::string_view str_data(reinterpret_cast(data.data()), data.size()); + return CreateShaderFromMSL(stage, str_data, "main0"); +} + +std::unique_ptr MetalDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + std::vector* out_binary /* = nullptr */) +{ +#ifdef _DEBUG + static constexpr u32 options = SPIRVCompiler::DebugInfo | SPIRVCompiler::VulkanRules; +#else + static constexpr u32 options = SPIRVCompiler::VulkanRules; +#endif + static constexpr bool dump_shaders = true; + + std::optional spirv = SPIRVCompiler::CompileShader(stage, source, options); + if (!spirv.has_value()) + { + Log_ErrorPrintf("Failed to compile shader to SPIR-V."); + return {}; + } + + std::optional msl = SPIRVCompiler::CompileSPIRVToMSL(spirv.value()); + if (!msl.has_value()) + { + Log_ErrorPrintf("Failed to compile SPIR-V to MSL."); + return {}; + } + if constexpr (dump_shaders) + { + DumpShader(s_next_bad_shader_id, "_input", source); + DumpShader(s_next_bad_shader_id, "_msl", msl.value()); + s_next_bad_shader_id++; + } + + if (out_binary) + { + out_binary->resize(msl->size()); + std::memcpy(out_binary->data(), msl->data(), msl->size()); + } + + return CreateShaderFromMSL(stage, msl.value(), "main0"); +} + +MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive) + : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive) +{ +} + +MetalPipeline::~MetalPipeline() +{ + [m_pipeline release]; +} + +void MetalPipeline::SetDebugName(const std::string_view& name) +{ + // readonly property :/ +} + +id MetalDevice::GetDepthState(const GPUPipeline::DepthState& ds) +{ + const auto it = m_depth_states.find(ds.key); + if (it != m_depth_states.end()) + return it->second; + + @autoreleasepool { + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = + {{ + MTLCompareFunctionNever, // Never + MTLCompareFunctionAlways, // Always + MTLCompareFunctionLess, // Less + MTLCompareFunctionLessEqual, // LessEqual + MTLCompareFunctionGreater, // Greater + MTLCompareFunctionGreaterEqual, // GreaterEqual + MTLCompareFunctionEqual, // Equal + }}; + + MTLDepthStencilDescriptor* desc = [[[MTLDepthStencilDescriptor alloc] init] autorelease]; + desc.depthCompareFunction = func_mapping[static_cast(ds.depth_test.GetValue())]; + desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE; + + id state = [m_device newDepthStencilStateWithDescriptor:desc]; + m_depth_states.emplace(ds.key, state); + if (state == nil) + Log_ErrorPrintf("Failed to create depth-stencil state."); + + return state; + } +} + +std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) +{ @autoreleasepool { + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitive_classes = + {{ + MTLPrimitiveTopologyClassPoint, // Points + MTLPrimitiveTopologyClassLine, // Lines + MTLPrimitiveTopologyClassTriangle, // Triangles + MTLPrimitiveTopologyClassTriangle, // TriangleStrips + }}; + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = + {{ + MTLPrimitiveTypePoint, // Points + MTLPrimitiveTypeLine, // Lines + MTLPrimitiveTypeTriangle, // Triangles + MTLPrimitiveTypeTriangleStrip, // TriangleStrips + }}; + + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const MTLVertexFormat + format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float + {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 + {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 + {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized}, // UNorm8 + {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 + {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 + {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, MTLVertexFormatUShort4Normalized}, // UNorm16 + {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 + {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 + }; + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + MTLCullModeNone, // None + MTLCullModeFront, // Front + MTLCullModeBack, // Back + }}; + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + MTLBlendFactorZero, // Zero + MTLBlendFactorOne, // One + MTLBlendFactorSourceColor, // SrcColor + MTLBlendFactorOneMinusSourceColor, // InvSrcColor + MTLBlendFactorDestinationColor, // DstColor + MTLBlendFactorOneMinusDestinationColor, // InvDstColor + MTLBlendFactorSourceAlpha, // SrcAlpha + MTLBlendFactorOneMinusSourceAlpha, // InvSrcAlpha + MTLBlendFactorSource1Alpha, // SrcAlpha1 + MTLBlendFactorOneMinusSource1Alpha, // InvSrcAlpha1 + MTLBlendFactorDestinationAlpha, // DstAlpha + MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha + MTLBlendFactorBlendColor, // ConstantAlpha + MTLBlendFactorOneMinusBlendColor, // InvConstantAlpha + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + MTLBlendOperationAdd, // Add + MTLBlendOperationSubtract, // Subtract + MTLBlendOperationReverseSubtract, // ReverseSubtract + MTLBlendOperationMin, // Min + MTLBlendOperationMax, // Max + }}; + + MTLRenderPipelineDescriptor* desc = [[[MTLRenderPipelineDescriptor alloc] init] autorelease]; + desc.vertexFunction = static_cast(config.vertex_shader)->GetFunction(); + desc.fragmentFunction = static_cast(config.fragment_shader)->GetFunction(); + + desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_format)]; + desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast(config.depth_format)]; + + // Input assembly. + MTLVertexDescriptor* vdesc = nil; + if (!config.input_layout.vertex_attributes.empty()) + { + vdesc = [MTLVertexDescriptor vertexDescriptor]; + for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; + DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); + + MTLVertexAttributeDescriptor* vd = vdesc.attributes[i]; + vd.format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; + vd.offset = static_cast(va.offset.GetValue()); + vd.bufferIndex = 1; + } + + vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex; + vdesc.layouts[1].stepRate = 1; + vdesc.layouts[1].stride = config.input_layout.vertex_stride; + + desc.vertexDescriptor = vdesc; + } + + // Rasterization state. + const MTLCullMode cull_mode = cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())]; + desc.rasterizationEnabled = TRUE; + desc.inputPrimitiveTopology = primitive_classes[static_cast(config.primitive)]; + + // Depth state + id depth = GetDepthState(config.depth); + if (depth == nil) + return {}; + + // Blending state + MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0]; + ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | + (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) | + (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) | + (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone); + + // General + const MTLPrimitiveType primitive = primitives[static_cast(config.primitive)]; + desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1; + + // Metal-specific stuff + desc.vertexBuffers[1].mutability = MTLMutabilityImmutable; + desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable; + + ca.blendingEnabled = config.blend.enable; + if (config.blend.enable) + { + ca.sourceRGBBlendFactor = blend_mapping[static_cast(config.blend.src_blend.GetValue())]; + ca.destinationRGBBlendFactor = blend_mapping[static_cast(config.blend.dst_blend.GetValue())]; + ca.rgbBlendOperation = op_mapping[static_cast(config.blend.blend_op.GetValue())]; + ca.sourceAlphaBlendFactor = blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())]; + ca.destinationAlphaBlendFactor = blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())]; + ca.alphaBlendOperation = op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())]; + } + + NSError* error = nullptr; + id pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error]; + if (pipeline == nil) + { + LogNSError(error, "Failed to create render pipeline state"); + return {}; + } + + return std::unique_ptr(new MetalPipeline(pipeline, depth, cull_mode, primitive)); +}} + +MetalTexture::MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, + Format format) + : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) +{ +} + +MetalTexture::~MetalTexture() +{ + Destroy(); +} + +#if 0 +void MetalTexture::CommitClear(IMetalDeviceContext* context) +{ + if (m_state == GPUTexture::State::Dirty) + return; + + // TODO: 11.1 + if (IsDepthStencil()) + { + if (m_state == GPUTexture::State::Invalidated) + ; // context->DiscardView(GetD3DDSV()); + else + context->ClearDepthStencilView(GetD3DDSV(), Metal_CLEAR_DEPTH, GetClearDepth(), 0); + } + else if (IsRenderTarget()) + { + if (m_state == GPUTexture::State::Invalidated) + ; // context->DiscardView(GetD3DRTV()); + else + context->ClearRenderTargetView(GetD3DRTV(), GetUNormClearColor().data()); + } + + m_state = GPUTexture::State::Dirty; +} +#endif + +bool MetalTexture::IsValid() const +{ + return (m_texture != nil); +} + +bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + // TODO fixme + Panic("Outta space"); + return false; + } + + const u32 offset = sb.GetCurrentOffset(); + StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); + sb.CommitMemory(req_size); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetTextureUploadEncoder(is_inline); + [encoder copyFromBuffer:sb.GetBuffer() sourceOffset:offset sourceBytesPerRow:aligned_pitch sourceBytesPerImage:0 + sourceSize:MTLSizeMake(width, height, 1) toTexture:m_texture destinationSlice: layer destinationLevel: level destinationOrigin: MTLOriginMake(x, y, 0)]; + m_state = GPUTexture::State::Dirty; + return true; +} + +bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, + u32 level /*= 0*/) +{ + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || + level > m_levels) + { + return false; + } + + // TODO: Commit Clear + + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalStreamBuffer& sb = MetalDevice::GetInstance().GetTextureStreamBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + // TODO: handle + Panic("Outta space"); + return false; + } + + *map = sb.GetCurrentHostPointer(); + *map_stride = aligned_pitch; + m_map_x = x; + m_map_y = y; + m_map_width = width; + m_map_height = height; + m_map_layer = layer; + m_map_level = level; + m_state = GPUTexture::State::Dirty; + return true; +} + +void MetalTexture::Unmap() +{ + const bool discard = (m_map_width == m_width && m_map_height == m_height); + const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = m_map_height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + const u32 offset = sb.GetCurrentOffset(); + sb.CommitMemory(req_size); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetTextureUploadEncoder(is_inline); + [encoder copyFromBuffer:sb.GetBuffer() sourceOffset:offset sourceBytesPerRow:aligned_pitch sourceBytesPerImage:0 + sourceSize:MTLSizeMake(m_map_width, m_map_height, 1) toTexture:m_texture destinationSlice: m_map_layer + destinationLevel: m_map_level destinationOrigin: MTLOriginMake(m_map_x, m_map_y, 0)]; + + m_map_x = 0; + m_map_y = 0; + m_map_width = 0; + m_map_height = 0; + m_map_layer = 0; + m_map_level = 0; +} + +void MetalTexture::SetDebugName(const std::string_view& name) +{ @autoreleasepool { + [m_texture setLabel:StringViewToNSString(name)]; +} } + +#if 0 +bool MetalTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, + Format format, const void* initial_data, u32 initial_data_stride) +{ + MetalDe + if (width > Metal_REQ_TEXTURE2D_U_OR_V_DIMENSION || height > Metal_REQ_TEXTURE2D_U_OR_V_DIMENSION || + layers > Metal_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION || (layers > 1 && samples > 1)) + { + Log_ErrorPrintf("Texture bounds (%ux%ux%u, %u mips, %u samples) are too large", width, height, layers, levels, + samples); + return false; + } + + u32 bind_flags = 0; + switch (type) + { + case Type::RenderTarget: + bind_flags = Metal_BIND_RENDER_TARGET | Metal_BIND_SHADER_RESOURCE; + break; + case Type::DepthStencil: + bind_flags = Metal_BIND_DEPTH_STENCIL; // | Metal_BIND_SHADER_RESOURCE; + break; + case Type::Texture: + bind_flags = Metal_BIND_SHADER_RESOURCE; + break; + case Type::RWTexture: + bind_flags = Metal_BIND_UNORDERED_ACCESS | Metal_BIND_SHADER_RESOURCE; + break; + default: + break; + } + + CMetal_TEXTURE2D_DESC desc(GetDXGIFormat(format), width, height, layers, levels, bind_flags, + dynamic ? Metal_USAGE_DYNAMIC : Metal_USAGE_DEFAULT, dynamic ? Metal_CPU_ACCESS_WRITE : 0, + samples, 0, 0); + + Metal_SUBRESOURCE_DATA srd; + srd.pSysMem = initial_data; + srd.SysMemPitch = initial_data_stride; + srd.SysMemSlicePitch = initial_data_stride * height; + + ComPtr texture; + const HRESULT tex_hr = device->CreateTexture2D(&desc, initial_data ? &srd : nullptr, texture.GetAddressOf()); + if (FAILED(tex_hr)) + { + Log_ErrorPrintf( + "Create texture failed: 0x%08X (%ux%u levels:%u samples:%u format:%u bind_flags:%X initial_data:%p)", tex_hr, + width, height, levels, samples, static_cast(format), bind_flags, initial_data); + return false; + } + + ComPtr srv; + if (bind_flags & Metal_BIND_SHADER_RESOURCE) + { + const Metal_SRV_DIMENSION srv_dimension = + (desc.SampleDesc.Count > 1) ? + Metal_SRV_DIMENSION_TEXTURE2DMS : + (desc.ArraySize > 1 ? Metal_SRV_DIMENSION_TEXTURE2DARRAY : Metal_SRV_DIMENSION_TEXTURE2D); + const CMetal_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); + const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create SRV for texture failed: 0x%08X", hr); + return false; + } + } + + ComPtr rtv_dsv; + if (bind_flags & Metal_BIND_RENDER_TARGET) + { + const Metal_RTV_DIMENSION rtv_dimension = + (desc.SampleDesc.Count > 1) ? Metal_RTV_DIMENSION_TEXTURE2DMS : Metal_RTV_DIMENSION_TEXTURE2D; + const CMetal_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); + ComPtr rtv; + const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create RTV for texture failed: 0x%08X", hr); + return false; + } + + rtv_dsv = std::move(rtv); + } + else if (bind_flags & Metal_BIND_DEPTH_STENCIL) + { + const Metal_DSV_DIMENSION dsv_dimension = + (desc.SampleDesc.Count > 1) ? Metal_DSV_DIMENSION_TEXTURE2DMS : Metal_DSV_DIMENSION_TEXTURE2D; + const CMetal_DEPTH_STENCIL_VIEW_DESC dsv_desc(dsv_dimension, desc.Format, 0, 0, desc.ArraySize); + ComPtr dsv; + const HRESULT hr = device->CreateDepthStencilView(texture.Get(), &dsv_desc, dsv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("Create DSV for texture failed: 0x%08X", hr); + return false; + } + + rtv_dsv = std::move(dsv); + } + + m_texture = std::move(texture); + m_srv = std::move(srv); + m_rtv_dsv = std::move(rtv_dsv); + m_width = static_cast(width); + m_height = static_cast(height); + m_layers = static_cast(layers); + m_levels = static_cast(levels); + m_samples = static_cast(samples); + m_format = format; + m_dynamic = dynamic; + return true; +} +#endif + +void MetalTexture::Destroy() +{ + if (m_texture != nil) + { + [m_texture release]; + m_texture = nil; + } + ClearBaseProperties(); +} + +std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, + GPUTexture::Type type, GPUTexture::Format format, + const void* data, u32 data_stride, bool dynamic /* = false */) +{ @autoreleasepool { + if (width > m_max_texture_size || height > m_max_texture_size || samples > m_max_multisamples) + return {}; + + const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast(format)]; + if (pixel_format == MTLPixelFormatInvalid) + return {}; + + MTLTextureDescriptor* desc = [[[MTLTextureDescriptor alloc] init] autorelease]; + desc.width = width; + desc.height = height; + desc.depth = levels; + desc.pixelFormat = pixel_format; + desc.mipmapLevelCount = levels; + + switch (type) + { + case GPUTexture::Type::Texture: + desc.usage = MTLTextureUsageShaderRead; + break; + + case GPUTexture::Type::RenderTarget: + case GPUTexture::Type::DepthStencil: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; + break; + + case GPUTexture::Type::RWTexture: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; + break; + + default: + UnreachableCode(); + break; + } + + id tex = [m_device newTextureWithDescriptor:desc]; + if (tex == nil) + { + Log_ErrorPrintf("Failed to create %ux%u texture.", width, height); + return {}; + } + + // This one can *definitely* go on the upload buffer. + std::unique_ptr gtex(new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); + if (data) + { + // TODO: handle multi-level uploads... + gtex->Update(0, 0, width, height, data, data_stride, 0, 0); + } + + return gtex; +} } + +MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, + id rt_tex, id ds_tex, + MTLRenderPassDescriptor* descriptor) + : GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor) +{ +} + +MetalFramebuffer::~MetalFramebuffer() +{ + // TODO: safe deleting? + if (m_rt_tex != nil) + [m_rt_tex release]; + if (m_ds_tex != nil) + [m_ds_tex release]; + [m_descriptor release]; +} + +void MetalFramebuffer::SetDebugName(const std::string_view& name) +{ +} + +MTLRenderPassDescriptor* MetalFramebuffer::GetDescriptor() const +{ + if (m_rt && m_rt->GetState() != GPUTexture::State::Dirty) + { + switch (m_rt->GetState()) + { + case GPUTexture::State::Cleared: + { + const auto clear_color = m_rt->GetUNormClearColor(); + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear; + m_descriptor.colorAttachments[0].clearColor = MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare; + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + if (m_ds) + { + switch (m_ds->GetState()) + { + case GPUTexture::State::Cleared: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionClear; + m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth(); + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare; + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + return m_descriptor; +} + +std::unique_ptr MetalDevice::CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, + GPUTexture* ds, u32 ds_layer, u32 ds_level) +{ @autoreleasepool { + MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + id rt_tex = rt ? [static_cast(rt)->GetMTLTexture() retain] : nil; + id ds_tex = ds ? [static_cast(ds)->GetMTLTexture() retain] : nil; + + if (rt) + { + desc.colorAttachments[0].texture = rt_tex; + desc.colorAttachments[0].slice = rt_layer; + desc.colorAttachments[0].level = rt_level; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + desc.colorAttachments[0].storeAction = MTLStoreActionStore; + } + + if (rt) + { + desc.depthAttachment.texture = ds_tex; + desc.depthAttachment.slice = ds_layer; + desc.depthAttachment.level = ds_level; + desc.depthAttachment.loadAction = MTLLoadActionLoad; + desc.depthAttachment.storeAction = MTLStoreActionStore; + } + + const u32 width = rt ? rt->GetMipWidth(rt_level) : ds->GetMipWidth(ds_level); + const u32 height = rt ? rt->GetMipHeight(rt_level) : ds->GetMipHeight(ds_level); + desc.renderTargetWidth = width; + desc.renderTargetHeight = height; + + return std::unique_ptr(new MetalFramebuffer(rt, ds, width, height, rt_tex, ds_tex, desc)); +} } + +MetalSampler::MetalSampler(id ss) : m_ss(ss) +{ +} + +MetalSampler::~MetalSampler() = default; + +void MetalSampler::SetDebugName(const std::string_view& name) +{ + // lame.. have to put it on the descriptor :/ +} + +std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& config) +{ @autoreleasepool { + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + MTLSamplerAddressModeRepeat, // Repeat + MTLSamplerAddressModeClampToEdge, // ClampToEdge + MTLSamplerAddressModeClampToBorderColor, // ClampToBorder + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> min_mag_filters = {{ + MTLSamplerMinMagFilterNearest, // Nearest + MTLSamplerMinMagFilterLinear, // Linear + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> mip_filters = {{ + MTLSamplerMipFilterNearest, // Nearest + MTLSamplerMipFilterLinear, // Linear + }}; + + struct BorderColorMapping + { + u32 color; + MTLSamplerBorderColor mtl_color; + }; + static constexpr BorderColorMapping border_color_mapping[] = { + {0x00000000u, MTLSamplerBorderColorTransparentBlack}, + {0xFF000000u, MTLSamplerBorderColorOpaqueBlack}, + {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite}, + }; + + MTLSamplerDescriptor* desc = [[[MTLSamplerDescriptor alloc] init] autorelease]; + desc.normalizedCoordinates = true; + desc.sAddressMode = ta[static_cast(config.address_u.GetValue())]; + desc.tAddressMode = ta[static_cast(config.address_v.GetValue())]; + desc.rAddressMode = ta[static_cast(config.address_w.GetValue())]; + desc.minFilter = min_mag_filters[static_cast(config.min_filter.GetValue())]; + desc.magFilter = min_mag_filters[static_cast(config.mag_filter.GetValue())]; + desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast(config.mip_filter.GetValue())] : MTLSamplerMipFilterNotMipmapped; + desc.lodMinClamp = static_cast(config.min_lod); + desc.lodMaxClamp = static_cast(config.max_lod); + desc.maxAnisotropy = config.anisotropy; + + if (config.address_u == GPUSampler::AddressMode::ClampToBorder || + config.address_v == GPUSampler::AddressMode::ClampToBorder || + config.address_w == GPUSampler::AddressMode::ClampToBorder) + { + u32 i; + for (i = 0; i < static_cast(std::size(border_color_mapping)); i++) + { + if (border_color_mapping[i].color == config.border_color) + break; + } + if (i == std::size(border_color_mapping)) + { + Log_ErrorPrintf("Unsupported border color: %08X", config.border_color.GetValue()); + return {}; + } + + desc.borderColor = border_color_mapping[i].mtl_color; + } + + // TODO: Pool? + id ss = [m_device newSamplerStateWithDescriptor:desc]; + if (ss == nil) + { + Log_ErrorPrintf("Failed to create sampler state."); + return {}; + } + + return std::unique_ptr(new MetalSampler([ss retain])); +} } + + +bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, + u32 out_data_stride) +{ +#if 0 + const MetalTexture* tex = static_cast(texture); + if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat())) + return false; + + const CMetal_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), + static_cast(y + height), 1); + m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box); + + Metal_MAPPED_SUBRESOURCE sr; + HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, Metal_MAP_READ, 0, &sr); + if (FAILED(hr)) + { + Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); + return false; + } + + const u32 copy_size = tex->GetPixelSize() * width; + StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); + m_context->Unmap(m_readback_staging_texture.Get(), 0); + return true; +#else + return false; +#endif +} + +#if 0 +bool MetalDevice::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) +{ + if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && + m_readback_staging_texture_format == format) + return true; + + DestroyStagingBuffer(); + + CMetal_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, Metal_USAGE_STAGING, Metal_CPU_ACCESS_READ); + HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr); + return false; + } + + return true; +} + +void MetalDevice::DestroyStagingBuffer() +{ + m_readback_staging_texture.Reset(); + m_readback_staging_texture_width = 0; + m_readback_staging_texture_height = 0; + m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; +} +#endif + +bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const +{ + return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); +} + +void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipWidth(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipWidth(dst_level)); + +#if 0 + MetalTexture* dst11 = static_cast(dst); + MetalTexture* src11 = static_cast(src); + + if (dst11->IsRenderTargetOrDepthStencil()) + { + if (src11->GetState() == GPUTexture::State::Cleared) + { + if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) + { + // pass clear through + dst11->m_state = src11->m_state; + dst11->m_clear_value = src11->m_clear_value; + return; + } + } + else if (dst_x == 0 && dst_y == 0 && width == dst11->GetMipWidth(dst_level) && + height == dst11->GetMipHeight(dst_level)) + { + // TODO: 11.1 discard + dst11->SetState(GPUTexture::State::Dirty); + } + + dst11->CommitClear(m_context.Get()); + } + + src11->CommitClear(m_context.Get()); + + const CMetal_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), + static_cast(src_y + height), 1); + m_context->CopySubresourceRegion(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()), + dst_x, dst_y, 0, src11->GetD3DTexture(), + MetalCalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); +#endif + Panic("Fixme"); +} + +void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) +{ +#if 0 + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipWidth(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipWidth(dst_level)); + DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); + + // DX11 can't resolve partial rects. + Assert(src_x == dst_x && src_y == dst_y); + + MetalTexture* dst11 = static_cast(dst); + MetalTexture* src11 = static_cast(src); + + src11->CommitClear(m_context.Get()); + dst11->CommitClear(m_context.Get()); + + m_context->ResolveSubresource(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()), + src11->GetD3DTexture(), MetalCalcSubresource(src_level, src_layer, src->GetLevels()), + dst11->GetDXGIFormat()); +#else + Panic("Fixme"); +#endif +} + +#if 0 + +MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) +{ +} + +MetalTextureBuffer::~MetalTextureBuffer() = default; + +bool MetalTextureBuffer::CreateBuffer(IMetalDevice* device) +{ + if (!m_buffer.Create(device, Metal_BIND_SHADER_RESOURCE, GetSizeInBytes())) + return false; + + static constexpr std::array(Format::MaxCount)> dxgi_formats = {{ + DXGI_FORMAT_R16_UINT, + }}; + + CMetal_SHADER_RESOURCE_VIEW_DESC srv_desc(m_buffer.GetD3DBuffer(), dxgi_formats[static_cast(m_format)], 0, + m_size_in_elements); + const HRESULT hr = device->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); + if (FAILED(hr)) + { + Log_ErrorPrintf("CreateShaderResourceView() failed: %08X", hr); + return false; + } + + return true; +} + +void* MetalTextureBuffer::Map(u32 required_elements) +{ + const u32 esize = GetElementSize(m_format); + const auto res = m_buffer.Map(MetalDevice::GetD3DContext(), esize, esize * required_elements); + m_current_position = res.index_aligned; + return res.pointer; +} + +void MetalTextureBuffer::Unmap(u32 used_elements) +{ + m_buffer.Unmap(MetalDevice::GetD3DContext(), used_elements * GetElementSize(m_format)); +} +#endif + +std::unique_ptr MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, + u32 size_in_elements) +{ +#if 0 + std::unique_ptr tb = std::make_unique(format, size_in_elements); + if (!tb->CreateBuffer(m_device.Get())) + tb.reset(); + + return tb; +#else + return {}; +#endif +} + +void MetalDevice::PushDebugGroup(const char* fmt, ...) +{ +} + +void MetalDevice::PopDebugGroup() +{ +} + +void MetalDevice::InsertDebugMessage(const char* fmt, ...) +{ +} + +void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const u32 req_size = vertex_size * vertex_count; + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + { + SubmitCommandBufferAndRestartRenderPass("out of vertex space"); + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + Panic("Failed to allocate vertex space"); + } + + *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); + *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; + *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; +} + +void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.CommitMemory(vertex_size * vertex_count); +} + +void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const u32 req_size = sizeof(DrawIndex) * index_count; + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + { + SubmitCommandBufferAndRestartRenderPass("out of index space"); + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + Panic("Failed to allocate index space"); + } + + *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); + *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); + *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); +} + +void MetalDevice::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); +} + +void MetalDevice::PushUniformBuffer(const void* data, u32 data_size) +{ + void* map = MapUniformBuffer(data_size); + std::memcpy(map, data, data_size); + UnmapUniformBuffer(data_size); +} + +void* MetalDevice::MapUniformBuffer(u32 size) +{ + const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + { + SubmitCommandBufferAndRestartRenderPass("out of uniform space"); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + Panic("Failed to allocate uniform space."); + } + + return m_uniform_buffer.GetCurrentHostPointer(); +} + +void MetalDevice::UnmapUniformBuffer(u32 size) +{ + m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); + m_uniform_buffer.CommitMemory(size); + if (InRenderPass()) + SetUniformBufferInRenderEncoder(); +} + +void MetalDevice::SetFramebuffer(GPUFramebuffer* fb) +{ +#if 0 + if (m_current_framebuffer == fb) + return; + + EndRenderPass(); + m_current_framebuffer = static_cast(fb); +#else + Panic("Fixme"); +#endif +} + +void MetalDevice::UnbindFramebuffer(MetalFramebuffer* fb) +{ +#if 0 + if (m_current_framebuffer != fb) + return; + + EndRenderPass(); + m_current_framebuffer = nullptr; +#else + Panic("Fixme"); +#endif +} + +void MetalDevice::SetPipeline(GPUPipeline* pipeline) +{ + if (m_current_pipeline == pipeline) + return; + + m_current_pipeline = static_cast(pipeline); + if (InRenderPass()) + { + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + + if (m_current_depth_state != m_current_pipeline->GetDepthState()) + { + m_current_depth_state = m_current_pipeline->GetDepthState(); + [m_render_encoder setDepthStencilState:m_current_depth_state]; + } + if (m_current_cull_mode != m_current_pipeline->GetCullMode()) + { + m_current_cull_mode = m_current_pipeline->GetCullMode(); + [m_render_encoder setCullMode:m_current_cull_mode]; + } + } +} + +void MetalDevice::UnbindPipeline(MetalPipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + m_current_pipeline = nullptr; +} + +void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + DebugAssert(slot < MAX_TEXTURE_SAMPLERS); + + id T = texture ? static_cast(texture)->GetMTLTexture() : nil; + id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; + + if (m_current_textures[slot] != T) + { + m_current_textures[slot] = T; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:T atIndex:slot]; + } + + if (m_current_samplers[slot] != S) + { + m_current_samplers[slot] = S; + if (InRenderPass()) + [m_render_encoder setFragmentSamplerState:S atIndex:slot]; + } +} + +void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) +{ +#if 0 + MetalTextureBuffer* B = static_cast(buffer); + m_context->PSSetShaderResources(0, 1, B->GetSRVArray()); +#else + Panic("Fixme"); +#endif +} + +void MetalDevice::UnbindTexture(MetalTexture* tex) +{ + const id T = tex->GetMTLTexture(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == T) + { + m_current_textures[i] = nil; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:nil atIndex:i]; + } + } +} + +void MetalDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle new_vp = Common::Rectangle::FromExtents(x, y, width, height); + if (new_vp == m_current_viewport) + return; + + m_current_viewport = new_vp; + if (InRenderPass()) + SetViewportInRenderEncoder(); +} + +void MetalDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const Common::Rectangle new_sr = Common::Rectangle::FromExtents(x, y, width, height); + if (new_sr == m_current_scissor) + return; + + m_current_scissor = new_sr; + if (InRenderPass()) + SetScissorInRenderEncoder(); +} + +void MetalDevice::BeginRenderPass() +{ + DebugAssert(m_render_encoder == nil); + + // Inline writes :( + if (m_inline_upload_encoder != nil) + { + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + } + + MTLRenderPassDescriptor* desc; + if (!m_current_framebuffer) + { + // Rendering to view, but we got interrupted... + desc = [MTLRenderPassDescriptor renderPassDescriptor]; + desc.colorAttachments[0].texture = [m_layer_drawable texture]; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + else + { + desc = m_current_framebuffer->GetDescriptor(); + } + + m_render_encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; +} + +void MetalDevice::EndRenderPass() +{ + DebugAssert(InRenderPass() && !IsInlineUploading()); + [m_render_encoder endEncoding]; + [m_render_encoder release]; + m_render_encoder = nil; +} + +void MetalDevice::EndInlineUploading() +{ + DebugAssert(IsInlineUploading() && !InRenderPass()); + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + m_inline_upload_encoder = nil; +} + +void MetalDevice::EndAnyEncoding() +{ + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); +} + +void MetalDevice::SetInitialEncoderState() +{ + // Set initial state. + // TODO: avoid uniform set here? it's probably going to get changed... + SetUniformBufferInRenderEncoder(); + [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1]; + [m_render_encoder setCullMode:m_current_cull_mode]; + if (m_current_depth_state != nil) + [m_render_encoder setDepthStencilState:m_current_depth_state]; + if (m_current_pipeline != nil) + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + SetViewportInRenderEncoder(); + SetScissorInRenderEncoder(); +} + +void MetalDevice::SetUniformBufferInRenderEncoder() +{ + [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; + [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; +} + +void MetalDevice::SetViewportInRenderEncoder() +{ + [m_render_encoder setViewport:(MTLViewport){ + static_cast(m_current_viewport.left), static_cast(m_current_viewport.top), + static_cast(m_current_viewport.GetWidth()), static_cast(m_current_viewport.GetHeight()), + 0.0, 1.0 }]; +} + +void MetalDevice::SetScissorInRenderEncoder() +{ + [m_render_encoder setScissorRect:(MTLScissorRect){ + static_cast(m_current_scissor.left), static_cast(m_current_scissor.top), + static_cast(m_current_scissor.GetWidth()), static_cast(m_current_scissor.GetHeight()) + }]; +} + +void MetalDevice::PreDrawCheck() +{ + if (!InRenderPass()) + BeginRenderPass(); +} + +void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) +{ + PreDrawCheck(); + [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; +} + +void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + PreDrawCheck(); + + const u32 index_offset = base_index * sizeof(u16); + [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() indexCount:index_count indexType:MTLIndexTypeUInt16 indexBuffer:m_index_buffer.GetBuffer() indexBufferOffset:index_offset instanceCount:1 baseVertex:base_vertex baseInstance:0]; +} + +id MetalDevice::GetTextureUploadEncoder(bool is_inline) +{ @autoreleasepool { + if (!is_inline) + { + if (!m_upload_cmdbuf) + { + m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain]; + [m_upload_encoder setLabel:@"Upload Encoder"]; + } + return m_upload_encoder; + } + + // Interleaved with draws. + if (m_inline_upload_encoder != nil) + return m_inline_upload_encoder; + + if (InRenderPass()) + EndRenderPass(); + m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain]; + return m_inline_upload_encoder; +} } + +bool MetalDevice::BeginPresent(bool skip_present) +{ @autoreleasepool { + if (skip_present || m_layer == nil) + return false; + + EndAnyEncoding(); + + m_layer_drawable = [[m_layer nextDrawable] retain]; + if (m_layer_drawable == nil) + return false; + + // Set up rendering to layer. + id layer_texture = [m_layer_drawable texture]; + m_current_framebuffer = nullptr; + m_layer_pass_desc.colorAttachments[0].texture = layer_texture; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; + SetInitialEncoderState(); + return true; +} } + +void MetalDevice::EndPresent() +{ + DebugAssert(!m_current_framebuffer); + EndAnyEncoding(); + + [m_render_cmdbuf presentDrawable:m_layer_drawable]; + [m_layer_drawable release]; + SubmitCommandBuffer(); +} + +void MetalDevice::CreateCommandBuffer() +{ @autoreleasepool { + DebugAssert(m_render_cmdbuf == nil); + const u64 fence_counter = ++m_current_fence_counter; + m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id) { + CommandBufferCompleted(fence_counter); + }]; +} } + +void MetalDevice::CommandBufferCompleted(u64 fence_counter) +{ + std::unique_lock lock(m_fence_mutex); + m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), std::memory_order_release); +} + +void MetalDevice::SubmitCommandBuffer() +{ + if (m_upload_cmdbuf != nil) + { + [m_upload_encoder endEncoding]; + [m_upload_encoder release]; + m_upload_encoder = nil; + [m_upload_cmdbuf commit]; + [m_upload_cmdbuf release]; + m_upload_cmdbuf = nil; + } + + if (m_render_cmdbuf != nil) + { + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); + + [m_render_cmdbuf commit]; + [m_render_cmdbuf release]; + m_render_cmdbuf = nil; + } + + CreateCommandBuffer(); +} + +void MetalDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) +{ + Log_DevPrintf("Submitting command buffer and restarting render pass due to %s", reason); + + const bool in_render_pass = InRenderPass(); + SubmitCommandBuffer(); + if (in_render_pass) + BeginRenderPass(); +} + +void MetalDevice::WaitForFenceCounter(u64 counter) +{ + if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter) + return; + + // TODO: There has to be a better way to do this.. + std::unique_lock lock(m_fence_mutex); + while (m_completed_fence_counter.load(std::memory_order_acquire) < counter) + { + lock.unlock(); + pthread_yield_np(); + lock.lock(); + } +} + +std::unique_ptr WrapNewMetalDevice() +{ + return std::unique_ptr(new MetalDevice()); +} diff --git a/src/core/gpu/metal_stream_buffer.h b/src/core/gpu/metal_stream_buffer.h new file mode 100644 index 000000000..d3a71223a --- /dev/null +++ b/src/core/gpu/metal_stream_buffer.h @@ -0,0 +1,65 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/types.h" + +#include +#include + +#ifndef __OBJC__ +#error This file needs to be compiled with Objective C++. +#endif + +#if __has_feature(objc_arc) +#error ARC should not be enabled. +#endif + +#include +#include + +class MetalStreamBuffer +{ +public: + MetalStreamBuffer(); + MetalStreamBuffer(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer(const MetalStreamBuffer&) = delete; + ~MetalStreamBuffer(); + + MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete; + + ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); } + ALWAYS_INLINE id GetBuffer() const { return m_buffer; } + ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } + ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } + ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } + ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } + + bool Create(id device, u32 size); + void Destroy(); + + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); + +private: + bool AllocateBuffer(u32 size); + void UpdateCurrentFencePosition(); + void UpdateGPUPosition(); + + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(u32 num_bytes); + + u32 m_size = 0; + u32 m_current_offset = 0; + u32 m_current_space = 0; + u32 m_current_gpu_position = 0; + + id m_buffer = nil; + u8* m_host_pointer = nullptr; + + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; +}; diff --git a/src/core/gpu/metal_stream_buffer.mm b/src/core/gpu/metal_stream_buffer.mm new file mode 100644 index 000000000..7aa4d75f6 --- /dev/null +++ b/src/core/gpu/metal_stream_buffer.mm @@ -0,0 +1,253 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "metal_stream_buffer.h" +#include "metal_device.h" + +#include "common/align.h" +#include "common/assert.h" +#include "common/log.h" + +Log_SetChannel(MetalDevice); + +MetalStreamBuffer::MetalStreamBuffer() = default; + +MetalStreamBuffer::~MetalStreamBuffer() +{ + if (IsValid()) + Destroy(); +} + +bool MetalStreamBuffer::Create(id device, u32 size) +{ @autoreleasepool { + const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + + id new_buffer = [device newBufferWithLength:size options:options]; + if (new_buffer == nil) + { + Log_ErrorPrintf("Failed to create buffer."); + return false; + } + + if (IsValid()) + Destroy(); + + // Replace with the new buffer + m_size = size; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + m_buffer = [new_buffer retain]; + m_host_pointer = static_cast([new_buffer contents]); + return true; +} } + +void MetalStreamBuffer::Destroy() +{ + m_size = 0; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + [m_buffer release]; + m_buffer = nil; + m_host_pointer = nullptr; +} + +bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) +{ + const u32 required_bytes = num_bytes + alignment; + + // Check for sane allocations + if (required_bytes > m_size) + { + Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast(num_bytes), + static_cast(m_size)); + Panic("Stream buffer overflow"); + return false; + } + + UpdateGPUPosition(); + + // Is the GPU behind or up to date with our current offset? + if (m_current_offset >= m_current_gpu_position) + { + const u32 remaining_bytes = m_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_size - m_current_offset; + return true; + } + + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_current_space = m_current_gpu_position - 1; + return true; + } + } + + // Is the GPU ahead of our current offset? + if (m_current_offset < m_current_gpu_position) + { + // We have from m_current_offset..m_current_gpu_position space to use. + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_current_gpu_position - m_current_offset - 1; + return true; + } + } + + // Can we find a fence to wait on that will give us enough memory? + if (WaitForClearSpace(required_bytes)) + { + const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; + m_current_offset += align_diff; + m_current_space -= align_diff; + return true; + } + + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. + return false; +} + +void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) +{ + DebugAssert((m_current_offset + final_num_bytes) <= m_size); + DebugAssert(final_num_bytes <= m_current_space); + + m_current_offset += final_num_bytes; + m_current_space -= final_num_bytes; + UpdateCurrentFencePosition(); +} + +void MetalStreamBuffer::UpdateCurrentFencePosition() +{ + // Has the offset changed since the last fence? + const u64 counter = MetalDevice::GetCurrentFenceCounter(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) + { + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; + return; + } + + // New buffer, so update the GPU position while we're at it. + m_tracked_fences.emplace_back(counter, m_current_offset); +} + +void MetalStreamBuffer::UpdateGPUPosition() +{ + auto start = m_tracked_fences.begin(); + auto end = start; + + const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) + { + m_current_gpu_position = end->second; + ++end; + } + + if (start != end) + { + m_tracked_fences.erase(start, end); + if (m_current_offset == m_current_gpu_position) + { + // GPU is all caught up now. + m_current_offset = 0; + m_current_gpu_position = 0; + m_current_space = m_size; + } + } +} + +bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) +{ + u32 new_offset = 0; + u32 new_space = 0; + u32 new_gpu_position = 0; + + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); ++iter) + { + // Would this fence bring us in line with the GPU? + // This is the "last resort" case, where a command buffer execution has been forced + // after no additional data has been written to it, so we can assume that after the + // fence has been signaled the entire buffer is now consumed. + u32 gpu_position = iter->second; + if (m_current_offset == gpu_position) + { + new_offset = 0; + new_space = m_size; + new_gpu_position = 0; + break; + } + + // Assuming that we wait for this fence, are we allocating in front of the GPU? + if (m_current_offset > gpu_position) + { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_space = m_size - m_current_offset; + new_gpu_position = gpu_position; + break; + } + + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_space = gpu_position - 1; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. This would give us between the current + // offset and the GPU position worth of space to work with. Again, > because we can't + // align the GPU position with the buffer offset. + u32 available_space_inbetween = gpu_position - m_current_offset; + if (available_space_inbetween > num_bytes) + { + // Leave the offset as-is, but update the GPU position. + new_offset = m_current_offset; + new_space = available_space_inbetween - 1; + new_gpu_position = gpu_position; + break; + } + } + } + + // Did any fences satisfy this condition? + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) + return false; + + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + MetalDevice::GetInstance().WaitForFenceCounter(iter->first); + m_tracked_fences.erase( + m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); + m_current_offset = new_offset; + m_current_space = new_space; + m_current_gpu_position = new_gpu_position; + return true; +} diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index fea6bdf80..453cb85cb 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2654,6 +2654,24 @@ std::unique_ptr GPU::CreateHardwareD3D11Renderer() #endif +#ifdef __APPLE__ + +std::unique_ptr GPU::CreateHardwareMetalRenderer() +{ + if (!Host::AcquireHostDisplay(RenderAPI::Metal)) + { + Log_ErrorPrintf("Host render API is incompatible"); + return nullptr; + } + + std::unique_ptr gpu(std::make_unique()); + if (!gpu->Initialize()) + return nullptr; + + return gpu; +} + +#endif std::unique_ptr GPU::CreateHardwareOpenGLRenderer() { diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index cf0103d22..4e2380308 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1162,6 +1162,8 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo) ss << "layout(std430"; if (IsVulkan()) ss << ", set = 0, binding = 0"; + else if (IsMetal()) + ss << ", set = 0, binding = 1"; else if (m_use_glsl_binding_layout) ss << ", binding = 0"; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index cae6d18dc..90067f4bf 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -883,6 +883,9 @@ static constexpr auto s_gpu_renderer_names = make_array( #ifdef _WIN32 "D3D11", "D3D12", #endif +#ifdef __APPLE__ + "Metal", +#endif #ifdef WITH_VULKAN "Vulkan", #endif @@ -894,6 +897,9 @@ static constexpr auto s_gpu_renderer_display_names = make_array( #ifdef _WIN32 TRANSLATABLE("GPURenderer", "Hardware (D3D11)"), TRANSLATABLE("GPURenderer", "Hardware (D3D12)"), #endif +#ifdef __APPLE__ + TRANSLATABLE("GPURenderer", "Hardware (Metal)"), +#endif #ifdef WITH_VULKAN TRANSLATABLE("GPURenderer", "Hardware (Vulkan)"), #endif diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index 2035a5964..7a425b59e 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -15,6 +15,7 @@ Log_SetChannel(ShaderGen); ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend) : m_render_api(render_api), m_glsl(render_api != RenderAPI::D3D11 && render_api != RenderAPI::D3D12), + m_spirv(render_api == RenderAPI::Vulkan || render_api == RenderAPI::Metal), m_supports_dual_source_blend(supports_dual_source_blend), m_use_glsl_interface_blocks(false) { #if defined(WITH_OPENGL) || defined(WITH_VULKAN) @@ -24,8 +25,8 @@ ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend) if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) SetGLSLVersionString(); - m_use_glsl_interface_blocks = (IsVulkan() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2); - m_use_glsl_binding_layout = (IsVulkan() || UseGLSLBindingLayout()); + m_use_glsl_interface_blocks = (IsVulkan() || IsMetal() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_VERSION_3_2); + m_use_glsl_binding_layout = (IsVulkan() || IsMetal() || UseGLSLBindingLayout()); if (m_render_api == RenderAPI::OpenGL) { @@ -109,7 +110,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) { if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) ss << m_glsl_version_string << "\n\n"; - else if (m_render_api == RenderAPI::Vulkan) + else if (m_spirv) ss << "#version 450 core\n\n"; #ifdef WITH_OPENGL @@ -157,6 +158,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) DefineMacro(ss, "API_D3D11", m_render_api == RenderAPI::D3D11); DefineMacro(ss, "API_D3D12", m_render_api == RenderAPI::D3D12); DefineMacro(ss, "API_VULKAN", m_render_api == RenderAPI::Vulkan); + DefineMacro(ss, "API_METAL", m_render_api == RenderAPI::Metal); #ifdef WITH_OPENGL if (m_render_api == RenderAPI::OpenGLES) @@ -275,6 +277,10 @@ void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_c else ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n"; } + else if (IsMetal()) + { + ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n"; + } else if (m_glsl) { if (m_use_glsl_binding_layout) @@ -343,7 +349,7 @@ const char* ShaderGen::GetInterpolationQualifier(bool interface_block, bool cent #else const bool shading_language_420pack = false; #endif - if (m_glsl && interface_block && (!IsVulkan() && !shading_language_420pack)) + if (m_glsl && interface_block && (!m_spirv && !shading_language_420pack)) { return (sample_interpolation ? (is_out ? "sample out " : "sample in ") : (centroid_interpolation ? (is_out ? "centroid out " : "centroid in ") : "")); @@ -381,7 +387,7 @@ void ShaderGen::DeclareVertexEntryPoint( { const char* qualifier = GetInterpolationQualifier(true, msaa, ssaa, true); - if (IsVulkan()) + if (m_spirv) ss << "layout(location = 0) "; ss << "out VertexData" << output_block_suffix << " {\n"; @@ -418,7 +424,7 @@ void ShaderGen::DeclareVertexEntryPoint( ss << "#define v_pos gl_Position\n\n"; if (declare_vertex_id) { - if (IsVulkan()) + if (m_spirv) ss << "#define v_id uint(gl_VertexIndex)\n"; else ss << "#define v_id uint(gl_VertexID)\n"; @@ -475,7 +481,7 @@ void ShaderGen::DeclareFragmentEntryPoint( { const char* qualifier = GetInterpolationQualifier(true, msaa, ssaa, false); - if (IsVulkan()) + if (m_spirv) ss << "layout(location = 0) "; ss << "in VertexData {\n"; diff --git a/src/core/shadergen.h b/src/core/shadergen.h index 11c015b8b..c1710a068 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -28,6 +28,7 @@ public: protected: ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == RenderAPI::Vulkan); } + ALWAYS_INLINE bool IsMetal() const { return (m_render_api == RenderAPI::Metal); } const char* GetInterpolationQualifier(bool interface_block, bool centroid_interpolation, bool sample_interpolation, bool is_out) const; @@ -56,6 +57,7 @@ protected: RenderAPI m_render_api; bool m_glsl; + bool m_spirv; bool m_supports_dual_source_blend; bool m_use_glsl_interface_blocks; bool m_use_glsl_binding_layout; diff --git a/src/core/system.cpp b/src/core/system.cpp index 84ac4b3ea..91107cf89 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -1644,6 +1644,12 @@ bool System::CreateGPU(GPURenderer renderer) break; #endif +#ifdef __APPLE__ + case GPURenderer::HardwareMetal: + g_gpu = GPU::CreateHardwareMetalRenderer(); + break; +#endif + case GPURenderer::Software: default: g_gpu = GPU::CreateSoftwareRenderer(); diff --git a/src/core/types.h b/src/core/types.h index 49ede88a5..3092d25f4 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -62,6 +62,9 @@ enum class GPURenderer : u8 HardwareD3D11, HardwareD3D12, #endif +#ifdef __APPLE__ + HardwareMetal, +#endif #ifdef WITH_VULKAN HardwareVulkan, #endif