diff --git a/.clang-format b/.clang-format index 59cf6e97d..10ff7f094 100644 --- a/.clang-format +++ b/.clang-format @@ -105,4 +105,111 @@ Standard: Cpp11 TabWidth: 2 UseTab: Never ... +--- +Language: ObjC +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: InlineOnly +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 2 +UseTab: Never +... diff --git a/src/core/gpu/gpu_device.cpp b/src/core/gpu/gpu_device.cpp index c07a9fb3a..501e0f27a 100644 --- a/src/core/gpu/gpu_device.cpp +++ b/src/core/gpu/gpu_device.cpp @@ -470,7 +470,7 @@ void GPUDevice::RenderImGui() const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; DebugAssert(!pcmd->UserCallback); - if (pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x) + if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x) continue; SetScissor(static_cast(pcmd->ClipRect.x), static_cast(pcmd->ClipRect.y), diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index 3672c5ccc..a911718a5 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -476,8 +476,8 @@ public: virtual RenderAPI GetRenderAPI() const = 0; - bool Create(const std::string_view& adapter, const std::string_view& shader_cache_path, - bool debug_device, bool vsync); + bool Create(const std::string_view& adapter, const std::string_view& shader_cache_path, bool debug_device, + bool vsync); void Destroy(); virtual bool HasSurface() const = 0; diff --git a/src/core/gpu/gpu_texture.cpp b/src/core/gpu/gpu_texture.cpp index 12686c1bb..fd4bb2207 100644 --- a/src/core/gpu/gpu_texture.cpp +++ b/src/core/gpu/gpu_texture.cpp @@ -28,7 +28,7 @@ void GPUTexture::ClearBaseProperties() m_state = State::Dirty; } -u32 GPUTexture::GPUTexture::GetPixelSize(GPUTexture::Format format) +u32 GPUTexture::GetPixelSize(GPUTexture::Format format) { switch (format) { @@ -59,7 +59,7 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector temp(width * height); @@ -102,7 +102,7 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector temp(width * height); diff --git a/src/core/gpu/metal_device.h b/src/core/gpu/metal_device.h index 11f66156f..f2bb7841c 100644 --- a/src/core/gpu/metal_device.h +++ b/src/core/gpu/metal_device.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -42,14 +43,14 @@ class MetalSampler final : public GPUSampler public: ~MetalSampler() override; - ALWAYS_INLINE id GetSamplerState() const { return m_ss; } + ALWAYS_INLINE id GetSamplerState() const { return m_ss; } void SetDebugName(const std::string_view& name) override; private: MetalSampler(id ss); - id m_ss; + id m_ss; }; class MetalShader final : public GPUShader @@ -59,16 +60,16 @@ class MetalShader final : public GPUShader public: ~MetalShader() override; - ALWAYS_INLINE id GetLibrary() const { return m_library; } - ALWAYS_INLINE id GetFunction() const { return m_function; } + ALWAYS_INLINE id GetLibrary() const { return m_library; } + ALWAYS_INLINE id GetFunction() const { return m_function; } void SetDebugName(const std::string_view& name) override; private: MetalShader(GPUShaderStage stage, id library, id function); - id m_library; - id m_function; + id m_library; + id m_function; }; class MetalPipeline final : public GPUPipeline @@ -77,21 +78,22 @@ class MetalPipeline final : public GPUPipeline public: ~MetalPipeline() override; - - ALWAYS_INLINE id GetPipelineState() const { return m_pipeline; } - ALWAYS_INLINE id GetDepthState() const { return m_depth; } - ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } - ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } + + ALWAYS_INLINE id GetPipelineState() const { return m_pipeline; } + ALWAYS_INLINE id GetDepthState() const { return m_depth; } + ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } + ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } void SetDebugName(const std::string_view& name) override; private: - MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive); - - id m_pipeline; - id m_depth; - MTLCullMode m_cull_mode; - MTLPrimitiveType m_primitive; + MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, + MTLPrimitiveType primitive); + + id m_pipeline; + id m_depth; + MTLCullMode m_cull_mode; + MTLPrimitiveType m_primitive; }; class MetalTexture final : public GPUTexture @@ -101,7 +103,7 @@ class MetalTexture final : public GPUTexture public: ~MetalTexture(); - ALWAYS_INLINE id GetMTLTexture() const { return m_texture; } + ALWAYS_INLINE id GetMTLTexture() const { return m_texture; } bool Create(id device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0); @@ -113,72 +115,69 @@ public: bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; void Unmap() override; + void MakeReadyForSampling() override; + void SetDebugName(const std::string_view& name) override; private: - MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, - Format format); - - id m_texture; + MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, + Format format); - u16 m_map_x = 0; - u16 m_map_y = 0; + id m_texture; + + u16 m_map_x = 0; + u16 m_map_y = 0; u16 m_map_width = 0; - u16 m_map_height = 0; - u8 m_map_layer = 0; - u8 m_map_level = 0; + u16 m_map_height = 0; + u8 m_map_layer = 0; + u8 m_map_level = 0; }; -#if 0 class MetalTextureBuffer final : public GPUTextureBuffer { public: MetalTextureBuffer(Format format, u32 size_in_elements); ~MetalTextureBuffer() override; - ALWAYS_INLINE IMetalBuffer* GetBuffer() const { return m_buffer.GetD3DBuffer(); } - ALWAYS_INLINE IMetalShaderResourceView* GetSRV() const { return m_srv.Get(); } - ALWAYS_INLINE IMetalShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); } + ALWAYS_INLINE id GetMTLBuffer() const { return m_buffer.GetBuffer(); } - bool CreateBuffer(IMetalDevice* device); + bool CreateBuffer(id device); // Inherited via GPUTextureBuffer - virtual void* Map(u32 required_elements) override; - virtual void Unmap(u32 used_elements) override; + void* Map(u32 required_elements) override; + void Unmap(u32 used_elements) override; private: MetalStreamBuffer m_buffer; - Microsoft::WRL::ComPtr m_srv; }; -#endif class MetalFramebuffer final : public GPUFramebuffer { - friend MetalDevice; + friend MetalDevice; public: - ~MetalFramebuffer() override; - - MTLRenderPassDescriptor* GetDescriptor() const; + ~MetalFramebuffer() override; - void SetDebugName(const std::string_view& name) override; + MTLRenderPassDescriptor* GetDescriptor() const; + + void SetDebugName(const std::string_view& name) override; private: - MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, id ds_tex, - MTLRenderPassDescriptor* descriptor); + MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, id ds_tex, + MTLRenderPassDescriptor* descriptor); - id m_rt_tex; - id m_ds_tex; - MTLRenderPassDescriptor* m_descriptor; + id m_rt_tex; + id m_ds_tex; + MTLRenderPassDescriptor* m_descriptor; }; class MetalDevice final : public GPUDevice { public: - ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_host_display.get()); } + ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast(g_gpu_device.get()); } ALWAYS_INLINE static id GetMTLDevice() { return GetInstance().m_device; } - ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } - ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } + ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } + ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } MetalDevice(); ~MetalDevice(); @@ -187,17 +186,9 @@ public: bool HasSurface() const override; - bool CreateDevice(const WindowInfo& wi, bool vsync) override; - bool SetupDevice() override; + bool UpdateWindow() override; + void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; - bool MakeCurrent() override; - bool DoneCurrent() override; - - bool ChangeWindow(const WindowInfo& new_wi) override; - void ResizeWindow(s32 new_window_width, s32 new_window_height) override; - bool SupportsFullscreen() const override; - bool IsFullscreen() override; - bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override; AdapterAndModeList GetAdapterAndModeList() override; void DestroySurface() override; @@ -258,59 +249,73 @@ public: bool BeginPresent(bool skip_present) override; void EndPresent() override; - void WaitForFenceCounter(u64 counter); - - ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } - id GetTextureUploadEncoder(bool is_inline); - - void SubmitCommandBuffer(); - void SubmitCommandBufferAndRestartRenderPass(const char* reason); - + void WaitForFenceCounter(u64 counter); + + ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } + id GetBlitEncoder(bool is_inline); + + void SubmitCommandBuffer(bool wait_for_completion = false); + void SubmitCommandBufferAndRestartRenderPass(const char* reason); + + void CommitClear(MetalTexture* tex); + void UnbindFramebuffer(MetalFramebuffer* fb); + void UnbindFramebuffer(MetalTexture* tex); void UnbindPipeline(MetalPipeline* pl); void UnbindTexture(MetalTexture* tex); + void UnbindTextureBuffer(MetalTextureBuffer* buf); + + static void DeferRelease(id obj); + static void DeferRelease(u64 fence_counter, id obj); static AdapterAndModeList StaticGetAdapterAndModeList(); +protected: + bool CreateDevice(const std::string_view& adapter, bool debug_device) override; + void DestroyDevice() override; + private: static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; - static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32/*16*/ * 1024 * 1024; // TODO reduce after separate allocations + static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; - - using DepthStateMap = std::unordered_map>; - ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } - + using DepthStateMap = std::unordered_map>; + + ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } + void SetFeatures(); - - std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, const std::string_view& entry_point); - - id GetDepthState(const GPUPipeline::DepthState& ds); - - void CreateCommandBuffer(); - void CommandBufferCompleted(u64 fence_counter); - - ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } - ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } - void BeginRenderPass(); - void EndRenderPass(); - void EndInlineUploading(); - void EndAnyEncoding(); + std::unique_ptr CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, + const std::string_view& entry_point); + + id GetDepthState(const GPUPipeline::DepthState& ds); + + void CreateCommandBuffer(); + void CommandBufferCompletedOffThread(u64 fence_counter); + void WaitForPreviousCommandBuffers(); + void CleanupObjects(); + + ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } + ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } + void BeginRenderPass(); + void EndRenderPass(); + void EndInlineUploading(); + void EndAnyEncoding(); + + Common::Rectangle ClampToFramebufferSize(const Common::Rectangle& rc) const; void PreDrawCheck(); - void SetInitialEncoderState(); - void SetUniformBufferInRenderEncoder(); - void SetViewportInRenderEncoder(); - void SetScissorInRenderEncoder(); + void SetInitialEncoderState(); + void SetUniformBufferInRenderEncoder(); + void SetViewportInRenderEncoder(); + void SetScissorInRenderEncoder(); - //bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); - //void DestroyStagingBuffer(); + bool CheckDownloadBufferSize(u32 required_size); - bool CreateLayer(); - void DestroyLayer(); + bool CreateLayer(); + void DestroyLayer(); bool CreateBuffers(); void DestroyBuffers(); @@ -320,54 +325,54 @@ private: void PopTimestampQuery(); void KickTimestampQuery(); - id m_device; - id m_queue; - - CAMetalLayer* m_layer = nil; - id m_layer_drawable = nil; - MTLRenderPassDescriptor* m_layer_pass_desc = nil; - - std::mutex m_fence_mutex; - u64 m_current_fence_counter = 0; - std::atomic m_completed_fence_counter{0}; - - DepthStateMap m_depth_states; + id m_device; + id m_queue; -// ComPtr m_readback_staging_texture; -// DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; -// u32 m_readback_staging_texture_width = 0; -// u32 m_readback_staging_texture_height = 0; + CAMetalLayer* m_layer = nil; + id m_layer_drawable = nil; + MTLRenderPassDescriptor* m_layer_pass_desc = nil; + + std::mutex m_fence_mutex; + u64 m_current_fence_counter = 0; + std::atomic m_completed_fence_counter{0}; + std::deque> m_cleanup_objects; // [fence_counter, object] + + DepthStateMap m_depth_states; + + id m_download_buffer = nil; + u32 m_download_buffer_size = 0; MetalStreamBuffer m_vertex_buffer; MetalStreamBuffer m_index_buffer; MetalStreamBuffer m_uniform_buffer; - MetalStreamBuffer m_texture_upload_buffer; - - id m_upload_cmdbuf = nil; - id m_upload_encoder = nil; - id m_inline_upload_encoder = nil; - - id m_render_cmdbuf = nil; - id m_render_encoder = nil; + MetalStreamBuffer m_texture_upload_buffer; + + id m_upload_cmdbuf = nil; + id m_upload_encoder = nil; + id m_inline_upload_encoder = nil; + + id m_render_cmdbuf = nil; + id m_render_encoder = nil; MetalFramebuffer* m_current_framebuffer = nullptr; - MetalPipeline* m_current_pipeline = nullptr; - id m_current_depth_state = nil; - MTLCullMode m_current_cull_mode = MTLCullModeNone; - u32 m_current_uniform_buffer_position = 0; + MetalPipeline* m_current_pipeline = nullptr; + id m_current_depth_state = nil; + MTLCullMode m_current_cull_mode = MTLCullModeNone; + u32 m_current_uniform_buffer_position = 0; - std::array, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; - std::array, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; - Common::Rectangle m_current_viewport = {}; - Common::Rectangle m_current_scissor = {}; - - bool m_vsync_enabled = false; + std::array, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; + std::array, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; + id m_current_ssbo = nil; + Common::Rectangle m_current_viewport = {}; + Common::Rectangle m_current_scissor = {}; -// std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; -// u8 m_read_timestamp_query = 0; -// u8 m_write_timestamp_query = 0; -// u8 m_waiting_timestamp_queries = 0; -// bool m_timestamp_query_started = false; -// float m_accumulated_gpu_time = 0.0f; + bool m_vsync_enabled = false; + + // std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; + // u8 m_read_timestamp_query = 0; + // u8 m_write_timestamp_query = 0; + // u8 m_waiting_timestamp_queries = 0; + // bool m_timestamp_query_started = false; + // float m_accumulated_gpu_time = 0.0f; }; diff --git a/src/core/gpu/metal_device.mm b/src/core/gpu/metal_device.mm index 031fddd68..22f2fb993 100644 --- a/src/core/gpu/metal_device.mm +++ b/src/core/gpu/metal_device.mm @@ -2,9 +2,9 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "metal_device.h" -#include "spirv_compiler.h" #include "../host_settings.h" #include "../shader_cache_version.h" +#include "spirv_compiler.h" #include "common/align.h" #include "common/assert.h" @@ -17,12 +17,14 @@ #define FMT_EXCEPTIONS 0 #include "fmt/format.h" -#include #include +#include #include Log_SetChannel(MetalDevice); +// TODO: Disable hazard tracking and issue barriers explicitly. + static constexpr MTLPixelFormat LAYER_MTL_PIXEL_FORMAT = MTLPixelFormatRGBA8Unorm; static constexpr GPUTexture::Format LAYER_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; @@ -35,13 +37,13 @@ static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; static constexpr std::array(GPUTexture::Format::Count)> s_pixel_format_mapping = { - MTLPixelFormatInvalid, // Unknown - MTLPixelFormatRGBA8Unorm, // RGBA8 - MTLPixelFormatBGRA8Unorm, // BGRA8 - MTLPixelFormatB5G6R5Unorm, // RGB565 - MTLPixelFormatA1BGR5Unorm,// RGBA5551 - MTLPixelFormatR8Unorm, // R8 - MTLPixelFormatDepth16Unorm, // D16 + MTLPixelFormatInvalid, // Unknown + MTLPixelFormatRGBA8Unorm, // RGBA8 + MTLPixelFormatBGRA8Unorm, // BGRA8 + MTLPixelFormatB5G6R5Unorm, // RGB565 + MTLPixelFormatA1BGR5Unorm, // RGBA5551 + MTLPixelFormatR8Unorm, // R8 + MTLPixelFormatDepth16Unorm, // D16 }; static constexpr std::array s_clear_color = {}; @@ -50,48 +52,42 @@ static unsigned s_next_bad_shader_id = 1; static NSString* StringViewToNSString(const std::string_view& str) { - if (str.empty()) - return nil; - - return [[[NSString alloc] autorelease] initWithBytes:str.data() length:static_cast(str.length()) encoding:NSUTF8StringEncoding]; + if (str.empty()) + return nil; + + return [[[NSString alloc] autorelease] initWithBytes:str.data() + length:static_cast(str.length()) + encoding:NSUTF8StringEncoding]; } static void LogNSError(NSError* error, const char* desc, ...) { - std::va_list ap; - va_start(ap, desc); - Log::Writev("MetalDevice", "", LOGLEVEL_ERROR, desc, ap); - va_end(ap); + std::va_list ap; + va_start(ap, desc); + Log::Writev("MetalDevice", "", LOGLEVEL_ERROR, desc, ap); + va_end(ap); - Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Code: %u", static_cast(error.code)); - Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Description: %s", [error.description UTF8String]); + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Code: %u", static_cast(error.code)); + Log::Writef("MetalDevice", "", LOGLEVEL_ERROR, " NSError Description: %s", [error.description UTF8String]); } template static void RunOnMainThread(F&& f) { - if ([NSThread isMainThread]) - f(); - else - dispatch_sync(dispatch_get_main_queue(), f); + if ([NSThread isMainThread]) + f(); + else + dispatch_sync(dispatch_get_main_queue(), f); } -MetalDevice::MetalDevice() -: m_current_viewport(0, 0, 1, 1) -, m_current_scissor(0, 0, 1, 1) +MetalDevice::MetalDevice() : m_current_viewport(0, 0, 1, 1), m_current_scissor(0, 0, 1, 1) { } MetalDevice::~MetalDevice() { - // TODO: Make virtual Destroy() method instead due to order of shit.. - //DestroyStagingBuffer(); - DestroyResources(); - DestroyBuffers(); - DestroySurface(); - - if (m_device != nil) - [m_device release]; + Assert(m_layer == nil); + Assert(m_device == nil); } RenderAPI MetalDevice::GetRenderAPI() const @@ -101,108 +97,82 @@ RenderAPI MetalDevice::GetRenderAPI() const bool MetalDevice::HasSurface() const { - // TODO FIXME - //return static_cast(m_swap_chain); - return false; + return (m_layer != nil); } bool MetalDevice::GetHostRefreshRate(float* refresh_rate) { -#if 0 - if (m_swap_chain && IsFullscreen()) - { - DXGI_SWAP_CHAIN_DESC desc; - if (SUCCEEDED(m_swap_chain->GetDesc(&desc)) && desc.BufferDesc.RefreshRate.Numerator > 0 && - desc.BufferDesc.RefreshRate.Denominator > 0) - { - Log_InfoPrintf("using fs rr: %u %u", desc.BufferDesc.RefreshRate.Numerator, - desc.BufferDesc.RefreshRate.Denominator); - *refresh_rate = static_cast(desc.BufferDesc.RefreshRate.Numerator) / - static_cast(desc.BufferDesc.RefreshRate.Denominator); - return true; - } - } - return GPUDevice::GetHostRefreshRate(refresh_rate); -#else - Panic("Fixme"); - return false; -#endif } void MetalDevice::SetVSync(bool enabled) { m_vsync_enabled = enabled; + + if (m_layer != nil) + [m_layer setDisplaySyncEnabled:enabled]; } -bool MetalDevice::CreateDevice(const WindowInfo& wi, bool vsync) -{ @autoreleasepool { - // TODO: adapter selection - id device = [MTLCreateSystemDefaultDevice() autorelease]; - if (device == nil) - { - Log_ErrorPrint("Failed to create default Metal device."); - return false; - } - - id queue = [[device newCommandQueue] autorelease]; - if (queue == nil) - { - Log_ErrorPrint("Failed to create command queue."); - return false; - } - - m_device = [device retain]; - m_queue = [queue retain]; - Log_InfoPrintf("Metal Device: %s", [[m_device name] UTF8String]); - - SetFeatures(); - - m_window_info = wi; - m_vsync_enabled = vsync; - - if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) - { - m_window_info = {}; - return false; - } - - CreateCommandBuffer(); - return true; -} } - -bool MetalDevice::SetupDevice() +bool MetalDevice::CreateDevice(const std::string_view& adapter, bool debug_device) { - if (!GPUDevice::SetupDevice()) - return false; + @autoreleasepool + { + // TODO: adapter selection + id device = [MTLCreateSystemDefaultDevice() autorelease]; + if (device == nil) + { + Log_ErrorPrint("Failed to create default Metal device."); + return false; + } - if (!CreateBuffers() || !CreateResources()) - return false; + id queue = [[device newCommandQueue] autorelease]; + if (queue == nil) + { + Log_ErrorPrint("Failed to create command queue."); + return false; + } - return true; + m_device = [device retain]; + m_queue = [queue retain]; + Log_InfoPrintf("Metal Device: %s", [[m_device name] UTF8String]); + + SetFeatures(); + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) + return false; + + CreateCommandBuffer(); + + if (!CreateBuffers()) + { + Log_ErrorPrintf("Failed to create buffers."); + return false; + } + + return true; + } } void MetalDevice::SetFeatures() { - // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d - if ([m_device supportsFamily:MTLGPUFamilyMacCatalyst1] || - [m_device supportsFamily:MTLGPUFamilyMac1] || - [m_device supportsFamily:MTLGPUFamilyApple3]) - { - m_max_texture_size = 16384; - } - else - { - m_max_texture_size = 8192; - } - + // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d + if ([m_device supportsFamily:MTLGPUFamilyMacCatalyst1] || [m_device supportsFamily:MTLGPUFamilyMac1] || + [m_device supportsFamily:MTLGPUFamilyApple3]) + { + m_max_texture_size = 16384; + } + else + { + m_max_texture_size = 8192; + } + m_max_multisamples = 0; for (u32 multisamples = 1; multisamples < 16; multisamples++) - { - if (![m_device supportsTextureSampleCount:multisamples]) - break; - m_max_multisamples = multisamples; - } + { + if (![m_device supportsTextureSampleCount:multisamples]) + break; + m_max_multisamples = multisamples; + } m_features.dual_source_blend = true; m_features.per_sample_shading = true; @@ -212,82 +182,120 @@ void MetalDevice::SetFeatures() m_features.texture_buffers_emulated_with_ssbo = true; } -bool MetalDevice::MakeCurrent() +void MetalDevice::DestroyDevice() { - return true; -} + WaitForPreviousCommandBuffers(); -bool MetalDevice::DoneCurrent() -{ - return true; + if (InRenderPass()) + EndRenderPass(); + + if (m_upload_cmdbuf != nil) + { + [m_upload_encoder endEncoding]; + [m_upload_encoder release]; + m_upload_encoder = nil; + [m_upload_cmdbuf release]; + m_upload_cmdbuf = nil; + } + if (m_render_cmdbuf != nil) + { + [m_render_cmdbuf release]; + m_render_cmdbuf = nil; + } + + DestroyBuffers(); + + for (auto& it : m_cleanup_objects) + [it.second release]; + m_cleanup_objects.clear(); + + if (m_queue != nil) + { + [m_queue release]; + m_queue = nil; + } + if (m_device != nil) + { + [m_device release]; + m_device = nil; + } } bool MetalDevice::CreateLayer() -{ @autoreleasepool { - RunOnMainThread([this]() { @autoreleasepool { - Log_InfoPrintf("Creating a %ux%u Metal layer.", m_window_info.surface_width,m_window_info.surface_height); - const auto size = CGSizeMake(static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)); - m_layer = [CAMetalLayer layer]; - [m_layer setDevice:m_device]; - [m_layer setDrawableSize:size]; - [m_layer setPixelFormat:MTLPixelFormatRGBA8Unorm]; - - NSView* view = GetWindowView(); - [view setWantsLayer:TRUE]; - [view setLayer:m_layer]; - }}); - - DebugAssert(m_layer_pass_desc == nil); - m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; - m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; - m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; - m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; - m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore; - m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0); - return true; -}} +{ + @autoreleasepool + { + RunOnMainThread([this]() { + @autoreleasepool + { + Log_InfoPrintf("Creating a %ux%u Metal layer.", m_window_info.surface_width, m_window_info.surface_height); + const auto size = + CGSizeMake(static_cast(m_window_info.surface_width), static_cast(m_window_info.surface_height)); + m_layer = [CAMetalLayer layer]; + [m_layer setDevice:m_device]; + [m_layer setDrawableSize:size]; + [m_layer setPixelFormat:MTLPixelFormatRGBA8Unorm]; + + NSView* view = GetWindowView(); + [view setWantsLayer:TRUE]; + [view setLayer:m_layer]; + } + }); + + [m_layer setDisplaySyncEnabled:m_vsync_enabled]; + + DebugAssert(m_layer_pass_desc == nil); + m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; + m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore; + m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0); + return true; + } +} void MetalDevice::DestroyLayer() { - if (m_layer == nil) - return; - - [m_layer_pass_desc release]; - m_layer_pass_desc = nil; - - RunOnMainThread([this]() { - NSView* view = GetWindowView(); - [view setLayer:nil]; - [view setWantsLayer:FALSE]; - [m_layer release]; - m_layer = nullptr; - }); + if (m_layer == nil) + return; + + // Should wait for previous command buffers to finish, which might be rendering to drawables. + WaitForPreviousCommandBuffers(); + + [m_layer_pass_desc release]; + m_layer_pass_desc = nil; + + RunOnMainThread([this]() { + NSView* view = GetWindowView(); + [view setLayer:nil]; + [view setWantsLayer:FALSE]; + [m_layer release]; + m_layer = nullptr; + }); } -bool MetalDevice::ChangeWindow(const WindowInfo& new_wi) +bool MetalDevice::UpdateWindow() { -#if 0 - DestroySurface(); + if (InRenderPass()) + EndRenderPass(); + DestroyLayer(); - m_window_info = new_wi; - return CreateSwapChain(nullptr); -#else - return false; -#endif + if (!AcquireWindow(false)) + return false; + + if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) + { + Log_ErrorPrintf("Failed to create layer on updated window"); + return false; + } + + return true; } void MetalDevice::DestroySurface() { -#if 0 - m_window_info.SetSurfaceless(); - if (IsFullscreen()) - SetFullscreen(false, 0, 0, 0.0f); - - m_swap_chain_rtv.Reset(); - m_swap_chain.Reset(); -#else - Panic("Fixme"); -#endif + DestroyLayer(); } std::string MetalDevice::GetShaderCacheBaseName(const std::string_view& type, bool debug) const @@ -295,48 +303,31 @@ std::string MetalDevice::GetShaderCacheBaseName(const std::string_view& type, bo return fmt::format("metal_{}{}", type, debug ? "_debug" : ""); } -void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height) +void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) { -#if 0 - if (!m_swap_chain) - return; + @autoreleasepool + { + m_window_info.surface_scale = new_window_scale; + if (static_cast(new_window_width) == m_window_info.surface_width && + static_cast(new_window_height) == m_window_info.surface_height) + { + return; + } - m_swap_chain_rtv.Reset(); + m_window_info.surface_width = new_window_width; + m_window_info.surface_height = new_window_height; - HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, - m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); - if (FAILED(hr)) - Log_ErrorPrintf("ResizeBuffers() failed: 0x%08X", hr); - - if (!CreateSwapChainRTV()) - Panic("Failed to recreate swap chain RTV after resize"); -#else - Panic("Fixme"); - // adjust pass desc -#endif -} - -bool MetalDevice::SupportsFullscreen() const -{ - return false; -} - -bool MetalDevice::IsFullscreen() -{ - return false; -} - -bool MetalDevice::SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) -{ - return false; + [m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)]; + m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; + m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; + } } bool MetalDevice::CreateBuffers() { - if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || - !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || + if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) || - !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) + !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) { Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); return false; @@ -347,22 +338,29 @@ bool MetalDevice::CreateBuffers() void MetalDevice::DestroyBuffers() { - m_texture_upload_buffer.Destroy(); - m_uniform_buffer.Destroy(); + if (m_download_buffer != nil) + { + [m_download_buffer release]; + m_download_buffer = nil; + m_download_buffer_size = 0; + } + + m_texture_upload_buffer.Destroy(); + m_uniform_buffer.Destroy(); m_vertex_buffer.Destroy(); m_index_buffer.Destroy(); - - for (auto& it : m_depth_states) - { - if (it.second != nil) - [it.second release]; - } - m_depth_states.clear(); + + for (auto& it : m_depth_states) + { + if (it.second != nil) + [it.second release]; + } + m_depth_states.clear(); } GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() { - return {}; + return {}; } GPUDevice::AdapterAndModeList MetalDevice::GetAdapterAndModeList() @@ -484,7 +482,7 @@ bool MetalDevice::SetGPUTimingEnabled(bool enabled) return true; } #else - return false; + return false; #endif } @@ -495,7 +493,7 @@ float MetalDevice::GetAndResetAccumulatedGPUTime() m_accumulated_gpu_time = 0.0f; return value; #else - return 0.0f; + return 0.0f; #endif } @@ -506,333 +504,321 @@ MetalShader::MetalShader(GPUShaderStage stage, id library, id MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, const std::string_view& entry_point) -{ @autoreleasepool { - NSString* const ns_source = StringViewToNSString(source); - NSError* error = nullptr; - id library = [m_device newLibraryWithSource:ns_source options:nil error:&error]; - if (!library) - { - LogNSError(error, "Failed to compile %s shader", GPUShader::GetStageName(stage)); - - auto fp = FileSystem::OpenManagedCFile( - Path::Combine(EmuFolders::DataRoot, fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); - if (fp) - { - std::fwrite(source.data(), source.size(), 1, fp.get()); - std::fprintf(fp.get(), "\n\nCompile %s failed: %u\n", GPUShader::GetStageName(stage), static_cast(error.code)); - - const char* utf_error = [error.description UTF8String]; - std::fwrite(utf_error, std::strlen(utf_error), 1, fp.get()); - } - - return {}; - } - - id function = [library newFunctionWithName:StringViewToNSString(entry_point)]; - if (!function) - { - Log_ErrorPrintf("Failed to get main function in compiled library"); - return {}; - } - - return std::unique_ptr(new MetalShader(stage, [library retain], [function retain])); -} } +std::unique_ptr MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, + const std::string_view& entry_point) +{ + @autoreleasepool + { + NSString* const ns_source = StringViewToNSString(source); + NSError* error = nullptr; + id library = [m_device newLibraryWithSource:ns_source options:nil error:&error]; + if (!library) + { + LogNSError(error, "Failed to compile %s shader", GPUShader::GetStageName(stage)); + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile %s failed: %u\n", GPUShader::GetStageName(stage), + static_cast(error.code)); + + const char* utf_error = [error.description UTF8String]; + std::fwrite(utf_error, std::strlen(utf_error), 1, fp.get()); + } + + return {}; + } + + id function = [library newFunctionWithName:StringViewToNSString(entry_point)]; + if (!function) + { + Log_ErrorPrintf("Failed to get main function in compiled library"); + return {}; + } + + return std::unique_ptr(new MetalShader(stage, [library retain], [function retain])); + } +} std::unique_ptr MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) { - const std::string_view str_data(reinterpret_cast(data.data()), data.size()); - return CreateShaderFromMSL(stage, str_data, "main0"); + const std::string_view str_data(reinterpret_cast(data.data()), data.size()); + return CreateShaderFromMSL(stage, str_data, "main0"); } std::unique_ptr MetalDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, std::vector* out_binary /* = nullptr */) { #ifdef _DEBUG - static constexpr u32 options = SPIRVCompiler::DebugInfo | SPIRVCompiler::VulkanRules; + static constexpr u32 options = SPIRVCompiler::DebugInfo | SPIRVCompiler::VulkanRules; #else - static constexpr u32 options = SPIRVCompiler::VulkanRules; + static constexpr u32 options = SPIRVCompiler::VulkanRules; #endif - static constexpr bool dump_shaders = true; - - std::optional spirv = SPIRVCompiler::CompileShader(stage, source, options); - if (!spirv.has_value()) - { - Log_ErrorPrintf("Failed to compile shader to SPIR-V."); - return {}; - } - - std::optional msl = SPIRVCompiler::CompileSPIRVToMSL(spirv.value()); - if (!msl.has_value()) - { - Log_ErrorPrintf("Failed to compile SPIR-V to MSL."); - return {}; - } - if constexpr (dump_shaders) - { - DumpShader(s_next_bad_shader_id, "_input", source); - DumpShader(s_next_bad_shader_id, "_msl", msl.value()); - s_next_bad_shader_id++; - } - - if (out_binary) - { - out_binary->resize(msl->size()); - std::memcpy(out_binary->data(), msl->data(), msl->size()); - } - - return CreateShaderFromMSL(stage, msl.value(), "main0"); + static constexpr bool dump_shaders = false; + + std::optional spirv = SPIRVCompiler::CompileShader(stage, source, options); + if (!spirv.has_value()) + { + Log_ErrorPrintf("Failed to compile shader to SPIR-V."); + return {}; + } + + std::optional msl = SPIRVCompiler::CompileSPIRVToMSL(spirv.value()); + if (!msl.has_value()) + { + Log_ErrorPrintf("Failed to compile SPIR-V to MSL."); + return {}; + } + if constexpr (dump_shaders) + { + DumpShader(s_next_bad_shader_id, "_input", source); + DumpShader(s_next_bad_shader_id, "_msl", msl.value()); + s_next_bad_shader_id++; + } + + if (out_binary) + { + out_binary->resize(msl->size()); + std::memcpy(out_binary->data(), msl->data(), msl->size()); + } + + return CreateShaderFromMSL(stage, msl.value(), "main0"); } -MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, MTLPrimitiveType primitive) +MetalPipeline::MetalPipeline(id pipeline, id depth, MTLCullMode cull_mode, + MTLPrimitiveType primitive) : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive) { } MetalPipeline::~MetalPipeline() { - [m_pipeline release]; + MetalDevice::DeferRelease(m_pipeline); } void MetalPipeline::SetDebugName(const std::string_view& name) { - // readonly property :/ + // readonly property :/ } id MetalDevice::GetDepthState(const GPUPipeline::DepthState& ds) { - const auto it = m_depth_states.find(ds.key); - if (it != m_depth_states.end()) - return it->second; - - @autoreleasepool { - static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = - {{ - MTLCompareFunctionNever, // Never - MTLCompareFunctionAlways, // Always - MTLCompareFunctionLess, // Less - MTLCompareFunctionLessEqual, // LessEqual - MTLCompareFunctionGreater, // Greater - MTLCompareFunctionGreaterEqual, // GreaterEqual - MTLCompareFunctionEqual, // Equal - }}; - - MTLDepthStencilDescriptor* desc = [[[MTLDepthStencilDescriptor alloc] init] autorelease]; - desc.depthCompareFunction = func_mapping[static_cast(ds.depth_test.GetValue())]; - desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE; - - id state = [m_device newDepthStencilStateWithDescriptor:desc]; - m_depth_states.emplace(ds.key, state); - if (state == nil) - Log_ErrorPrintf("Failed to create depth-stencil state."); + const auto it = m_depth_states.find(ds.key); + if (it != m_depth_states.end()) + return it->second; - return state; - } + @autoreleasepool + { + static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = { + { + MTLCompareFunctionNever, // Never + MTLCompareFunctionAlways, // Always + MTLCompareFunctionLess, // Less + MTLCompareFunctionLessEqual, // LessEqual + MTLCompareFunctionGreater, // Greater + MTLCompareFunctionGreaterEqual, // GreaterEqual + MTLCompareFunctionEqual, // Equal + }}; + + MTLDepthStencilDescriptor* desc = [[[MTLDepthStencilDescriptor alloc] init] autorelease]; + desc.depthCompareFunction = func_mapping[static_cast(ds.depth_test.GetValue())]; + desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE; + + id state = [m_device newDepthStencilStateWithDescriptor:desc]; + m_depth_states.emplace(ds.key, state); + if (state == nil) + Log_ErrorPrintf("Failed to create depth-stencil state."); + + return state; + } } std::unique_ptr MetalDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) -{ @autoreleasepool { - static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitive_classes = - {{ - MTLPrimitiveTopologyClassPoint, // Points - MTLPrimitiveTopologyClassLine, // Lines - MTLPrimitiveTopologyClassTriangle, // Triangles - MTLPrimitiveTopologyClassTriangle, // TriangleStrips - }}; - static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = - {{ - MTLPrimitiveTypePoint, // Points - MTLPrimitiveTypeLine, // Lines - MTLPrimitiveTypeTriangle, // Triangles - MTLPrimitiveTypeTriangleStrip, // TriangleStrips - }}; - - static constexpr u32 MAX_COMPONENTS = 4; - static constexpr const MTLVertexFormat - format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { - {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float - {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 - {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 - {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized}, // UNorm8 - {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 - {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 - {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, MTLVertexFormatUShort4Normalized}, // UNorm16 - {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 - {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 - }; - - static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ - MTLCullModeNone, // None - MTLCullModeFront, // Front - MTLCullModeBack, // Back - }}; - - static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ - MTLBlendFactorZero, // Zero - MTLBlendFactorOne, // One - MTLBlendFactorSourceColor, // SrcColor - MTLBlendFactorOneMinusSourceColor, // InvSrcColor - MTLBlendFactorDestinationColor, // DstColor - MTLBlendFactorOneMinusDestinationColor, // InvDstColor - MTLBlendFactorSourceAlpha, // SrcAlpha - MTLBlendFactorOneMinusSourceAlpha, // InvSrcAlpha - MTLBlendFactorSource1Alpha, // SrcAlpha1 - MTLBlendFactorOneMinusSource1Alpha, // InvSrcAlpha1 - MTLBlendFactorDestinationAlpha, // DstAlpha - MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha - MTLBlendFactorBlendColor, // ConstantAlpha - MTLBlendFactorOneMinusBlendColor, // InvConstantAlpha - }}; - - static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ - MTLBlendOperationAdd, // Add - MTLBlendOperationSubtract, // Subtract - MTLBlendOperationReverseSubtract, // ReverseSubtract - MTLBlendOperationMin, // Min - MTLBlendOperationMax, // Max - }}; - - MTLRenderPipelineDescriptor* desc = [[[MTLRenderPipelineDescriptor alloc] init] autorelease]; - desc.vertexFunction = static_cast(config.vertex_shader)->GetFunction(); - desc.fragmentFunction = static_cast(config.fragment_shader)->GetFunction(); - - desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_format)]; - desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast(config.depth_format)]; - - // Input assembly. - MTLVertexDescriptor* vdesc = nil; - if (!config.input_layout.vertex_attributes.empty()) - { - vdesc = [MTLVertexDescriptor vertexDescriptor]; - for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) - { - const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; - DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); - - MTLVertexAttributeDescriptor* vd = vdesc.attributes[i]; - vd.format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; - vd.offset = static_cast(va.offset.GetValue()); - vd.bufferIndex = 1; - } - - vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex; - vdesc.layouts[1].stepRate = 1; - vdesc.layouts[1].stride = config.input_layout.vertex_stride; - - desc.vertexDescriptor = vdesc; - } +{ + @autoreleasepool + { + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> + primitive_classes = {{ + MTLPrimitiveTopologyClassPoint, // Points + MTLPrimitiveTopologyClassLine, // Lines + MTLPrimitiveTopologyClassTriangle, // Triangles + MTLPrimitiveTopologyClassTriangle, // TriangleStrips + }}; + static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ + MTLPrimitiveTypePoint, // Points + MTLPrimitiveTypeLine, // Lines + MTLPrimitiveTypeTriangle, // Triangles + MTLPrimitiveTypeTriangleStrip, // TriangleStrips + }}; - // Rasterization state. - const MTLCullMode cull_mode = cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())]; - desc.rasterizationEnabled = TRUE; - desc.inputPrimitiveTopology = primitive_classes[static_cast(config.primitive)]; - - // Depth state - id depth = GetDepthState(config.depth); - if (depth == nil) - return {}; - - // Blending state - MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0]; - ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | - (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) | - (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) | - (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone); - - // General - const MTLPrimitiveType primitive = primitives[static_cast(config.primitive)]; - desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1; - - // Metal-specific stuff - desc.vertexBuffers[1].mutability = MTLMutabilityImmutable; - desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable; - - ca.blendingEnabled = config.blend.enable; - if (config.blend.enable) - { - ca.sourceRGBBlendFactor = blend_mapping[static_cast(config.blend.src_blend.GetValue())]; - ca.destinationRGBBlendFactor = blend_mapping[static_cast(config.blend.dst_blend.GetValue())]; - ca.rgbBlendOperation = op_mapping[static_cast(config.blend.blend_op.GetValue())]; - ca.sourceAlphaBlendFactor = blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())]; - ca.destinationAlphaBlendFactor = blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())]; - ca.alphaBlendOperation = op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())]; - } - - NSError* error = nullptr; - id pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error]; - if (pipeline == nil) - { - LogNSError(error, "Failed to create render pipeline state"); - return {}; - } - - return std::unique_ptr(new MetalPipeline(pipeline, depth, cull_mode, primitive)); -}} + static constexpr u32 MAX_COMPONENTS = 4; + static constexpr const MTLVertexFormat + format_mapping[static_cast(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { + {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float + {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 + {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 + {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, + MTLVertexFormatUChar4Normalized}, // UNorm8 + {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 + {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 + {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, + MTLVertexFormatUShort4Normalized}, // UNorm16 + {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 + {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 + }; + + static constexpr std::array(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ + MTLCullModeNone, // None + MTLCullModeFront, // Front + MTLCullModeBack, // Back + }}; + + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ + MTLBlendFactorZero, // Zero + MTLBlendFactorOne, // One + MTLBlendFactorSourceColor, // SrcColor + MTLBlendFactorOneMinusSourceColor, // InvSrcColor + MTLBlendFactorDestinationColor, // DstColor + MTLBlendFactorOneMinusDestinationColor, // InvDstColor + MTLBlendFactorSourceAlpha, // SrcAlpha + MTLBlendFactorOneMinusSourceAlpha, // InvSrcAlpha + MTLBlendFactorSource1Alpha, // SrcAlpha1 + MTLBlendFactorOneMinusSource1Alpha, // InvSrcAlpha1 + MTLBlendFactorDestinationAlpha, // DstAlpha + MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha + MTLBlendFactorBlendColor, // ConstantAlpha + MTLBlendFactorOneMinusBlendColor, // InvConstantAlpha + }}; + + static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ + MTLBlendOperationAdd, // Add + MTLBlendOperationSubtract, // Subtract + MTLBlendOperationReverseSubtract, // ReverseSubtract + MTLBlendOperationMin, // Min + MTLBlendOperationMax, // Max + }}; + + MTLRenderPipelineDescriptor* desc = [[[MTLRenderPipelineDescriptor alloc] init] autorelease]; + desc.vertexFunction = static_cast(config.vertex_shader)->GetFunction(); + desc.fragmentFunction = static_cast(config.fragment_shader)->GetFunction(); + + desc.colorAttachments[0].pixelFormat = s_pixel_format_mapping[static_cast(config.color_format)]; + desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast(config.depth_format)]; + + // Input assembly. + MTLVertexDescriptor* vdesc = nil; + if (!config.input_layout.vertex_attributes.empty()) + { + vdesc = [MTLVertexDescriptor vertexDescriptor]; + for (u32 i = 0; i < static_cast(config.input_layout.vertex_attributes.size()); i++) + { + const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; + DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); + + MTLVertexAttributeDescriptor* vd = vdesc.attributes[i]; + vd.format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; + vd.offset = static_cast(va.offset.GetValue()); + vd.bufferIndex = 1; + } + + vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex; + vdesc.layouts[1].stepRate = 1; + vdesc.layouts[1].stride = config.input_layout.vertex_stride; + + desc.vertexDescriptor = vdesc; + } + + // Rasterization state. + const MTLCullMode cull_mode = cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())]; + desc.rasterizationEnabled = TRUE; + desc.inputPrimitiveTopology = primitive_classes[static_cast(config.primitive)]; + + // Depth state + id depth = GetDepthState(config.depth); + if (depth == nil) + return {}; + + // Blending state + MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0]; + ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | + (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) | + (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) | + (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone); + + // General + const MTLPrimitiveType primitive = primitives[static_cast(config.primitive)]; + desc.rasterSampleCount = config.per_sample_shading ? config.samples : 1; + + // Metal-specific stuff + desc.vertexBuffers[1].mutability = MTLMutabilityImmutable; + desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable; + + ca.blendingEnabled = config.blend.enable; + if (config.blend.enable) + { + ca.sourceRGBBlendFactor = blend_mapping[static_cast(config.blend.src_blend.GetValue())]; + ca.destinationRGBBlendFactor = blend_mapping[static_cast(config.blend.dst_blend.GetValue())]; + ca.rgbBlendOperation = op_mapping[static_cast(config.blend.blend_op.GetValue())]; + ca.sourceAlphaBlendFactor = blend_mapping[static_cast(config.blend.src_alpha_blend.GetValue())]; + ca.destinationAlphaBlendFactor = blend_mapping[static_cast(config.blend.dst_alpha_blend.GetValue())]; + ca.alphaBlendOperation = op_mapping[static_cast(config.blend.alpha_blend_op.GetValue())]; + } + + NSError* error = nullptr; + id pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error]; + if (pipeline == nil) + { + LogNSError(error, "Failed to create render pipeline state"); + return {}; + } + + return std::unique_ptr(new MetalPipeline(pipeline, depth, cull_mode, primitive)); + } +} MetalTexture::MetalTexture(id texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, - Format format) + Format format) : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) { } MetalTexture::~MetalTexture() { - Destroy(); + MetalDevice::GetInstance().UnbindTexture(this); + Destroy(); } -#if 0 -void MetalTexture::CommitClear(IMetalDeviceContext* context) -{ - if (m_state == GPUTexture::State::Dirty) - return; - - // TODO: 11.1 - if (IsDepthStencil()) - { - if (m_state == GPUTexture::State::Invalidated) - ; // context->DiscardView(GetD3DDSV()); - else - context->ClearDepthStencilView(GetD3DDSV(), Metal_CLEAR_DEPTH, GetClearDepth(), 0); - } - else if (IsRenderTarget()) - { - if (m_state == GPUTexture::State::Invalidated) - ; // context->DiscardView(GetD3DRTV()); - else - context->ClearRenderTargetView(GetD3DRTV(), GetUNormClearColor().data()); - } - - m_state = GPUTexture::State::Dirty; -} -#endif - bool MetalTexture::IsValid() const { return (m_texture != nil); @@ -841,27 +827,34 @@ bool MetalTexture::IsValid() const bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; - - MetalDevice& dev = MetalDevice::GetInstance(); - MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); - if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) - { - // TODO fixme - Panic("Outta space"); - return false; - } - - const u32 offset = sb.GetCurrentOffset(); - StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); - sb.CommitMemory(req_size); - - // TODO: track this - const bool is_inline = true; - id encoder = dev.GetTextureUploadEncoder(is_inline); - [encoder copyFromBuffer:sb.GetBuffer() sourceOffset:offset sourceBytesPerRow:aligned_pitch sourceBytesPerImage:0 - sourceSize:MTLSizeMake(width, height, 1) toTexture:m_texture destinationSlice: layer destinationLevel: level destinationOrigin: MTLOriginMake(x, y, 0)]; + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + // TODO fixme + Panic("Outta space"); + return false; + } + + const u32 offset = sb.GetCurrentOffset(); + StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); + sb.CommitMemory(req_size); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetBlitEncoder(is_inline); + [encoder copyFromBuffer:sb.GetBuffer() + sourceOffset:offset + sourceBytesPerRow:aligned_pitch + sourceBytesPerImage:0 + sourceSize:MTLSizeMake(width, height, 1) + toTexture:m_texture + destinationSlice:layer + destinationLevel:level + destinationOrigin:MTLOriginMake(x, y, 0)]; m_state = GPUTexture::State::Dirty; return true; } @@ -869,262 +862,166 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, u32 level /*= 0*/) { - if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || - level > m_levels) - { - return false; - } - - // TODO: Commit Clear - - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; - - MetalStreamBuffer& sb = MetalDevice::GetInstance().GetTextureStreamBuffer(); - if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) - { - // TODO: handle - Panic("Outta space"); - return false; - } - - *map = sb.GetCurrentHostPointer(); - *map_stride = aligned_pitch; - m_map_x = x; - m_map_y = y; - m_map_width = width; - m_map_height = height; - m_map_layer = layer; - m_map_level = level; - m_state = GPUTexture::State::Dirty; - return true; + if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) + return false; + + const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = height * aligned_pitch; + + MetalDevice& dev = MetalDevice::GetInstance(); + dev.CommitClear(this); + + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + dev.SubmitCommandBuffer(); + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + { + Panic("Failed to allocate space in texture upload buffer"); + return false; + } + } + + *map = sb.GetCurrentHostPointer(); + *map_stride = aligned_pitch; + m_map_x = x; + m_map_y = y; + m_map_width = width; + m_map_height = height; + m_map_layer = layer; + m_map_level = level; + m_state = GPUTexture::State::Dirty; + return true; } void MetalTexture::Unmap() { - const bool discard = (m_map_width == m_width && m_map_height == m_height); - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = m_map_height * aligned_pitch; - - MetalDevice& dev = MetalDevice::GetInstance(); - MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); - const u32 offset = sb.GetCurrentOffset(); - sb.CommitMemory(req_size); - - // TODO: track this - const bool is_inline = true; - id encoder = dev.GetTextureUploadEncoder(is_inline); - [encoder copyFromBuffer:sb.GetBuffer() sourceOffset:offset sourceBytesPerRow:aligned_pitch sourceBytesPerImage:0 - sourceSize:MTLSizeMake(m_map_width, m_map_height, 1) toTexture:m_texture destinationSlice: m_map_layer - destinationLevel: m_map_level destinationOrigin: MTLOriginMake(m_map_x, m_map_y, 0)]; + const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = m_map_height * aligned_pitch; - m_map_x = 0; - m_map_y = 0; - m_map_width = 0; - m_map_height = 0; - m_map_layer = 0; - m_map_level = 0; + MetalDevice& dev = MetalDevice::GetInstance(); + MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); + const u32 offset = sb.GetCurrentOffset(); + sb.CommitMemory(req_size); + + // TODO: track this + const bool is_inline = true; + id encoder = dev.GetBlitEncoder(is_inline); + [encoder copyFromBuffer:sb.GetBuffer() + sourceOffset:offset + sourceBytesPerRow:aligned_pitch + sourceBytesPerImage:0 + sourceSize:MTLSizeMake(m_map_width, m_map_height, 1) + toTexture:m_texture + destinationSlice:m_map_layer + destinationLevel:m_map_level + destinationOrigin:MTLOriginMake(m_map_x, m_map_y, 0)]; + + m_map_x = 0; + m_map_y = 0; + m_map_width = 0; + m_map_height = 0; + m_map_layer = 0; + m_map_level = 0; +} + +void MetalTexture::MakeReadyForSampling() +{ + MetalDevice::GetInstance().UnbindFramebuffer(this); } void MetalTexture::SetDebugName(const std::string_view& name) -{ @autoreleasepool { - [m_texture setLabel:StringViewToNSString(name)]; -} } - -#if 0 -bool MetalTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, - Format format, const void* initial_data, u32 initial_data_stride) { - MetalDe - if (width > Metal_REQ_TEXTURE2D_U_OR_V_DIMENSION || height > Metal_REQ_TEXTURE2D_U_OR_V_DIMENSION || - layers > Metal_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION || (layers > 1 && samples > 1)) + @autoreleasepool { - Log_ErrorPrintf("Texture bounds (%ux%ux%u, %u mips, %u samples) are too large", width, height, layers, levels, - samples); - return false; + [m_texture setLabel:StringViewToNSString(name)]; } - - u32 bind_flags = 0; - switch (type) - { - case Type::RenderTarget: - bind_flags = Metal_BIND_RENDER_TARGET | Metal_BIND_SHADER_RESOURCE; - break; - case Type::DepthStencil: - bind_flags = Metal_BIND_DEPTH_STENCIL; // | Metal_BIND_SHADER_RESOURCE; - break; - case Type::Texture: - bind_flags = Metal_BIND_SHADER_RESOURCE; - break; - case Type::RWTexture: - bind_flags = Metal_BIND_UNORDERED_ACCESS | Metal_BIND_SHADER_RESOURCE; - break; - default: - break; - } - - CMetal_TEXTURE2D_DESC desc(GetDXGIFormat(format), width, height, layers, levels, bind_flags, - dynamic ? Metal_USAGE_DYNAMIC : Metal_USAGE_DEFAULT, dynamic ? Metal_CPU_ACCESS_WRITE : 0, - samples, 0, 0); - - Metal_SUBRESOURCE_DATA srd; - srd.pSysMem = initial_data; - srd.SysMemPitch = initial_data_stride; - srd.SysMemSlicePitch = initial_data_stride * height; - - ComPtr texture; - const HRESULT tex_hr = device->CreateTexture2D(&desc, initial_data ? &srd : nullptr, texture.GetAddressOf()); - if (FAILED(tex_hr)) - { - Log_ErrorPrintf( - "Create texture failed: 0x%08X (%ux%u levels:%u samples:%u format:%u bind_flags:%X initial_data:%p)", tex_hr, - width, height, levels, samples, static_cast(format), bind_flags, initial_data); - return false; - } - - ComPtr srv; - if (bind_flags & Metal_BIND_SHADER_RESOURCE) - { - const Metal_SRV_DIMENSION srv_dimension = - (desc.SampleDesc.Count > 1) ? - Metal_SRV_DIMENSION_TEXTURE2DMS : - (desc.ArraySize > 1 ? Metal_SRV_DIMENSION_TEXTURE2DARRAY : Metal_SRV_DIMENSION_TEXTURE2D); - const CMetal_SHADER_RESOURCE_VIEW_DESC srv_desc(srv_dimension, desc.Format, 0, desc.MipLevels, 0, desc.ArraySize); - const HRESULT hr = device->CreateShaderResourceView(texture.Get(), &srv_desc, srv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create SRV for texture failed: 0x%08X", hr); - return false; - } - } - - ComPtr rtv_dsv; - if (bind_flags & Metal_BIND_RENDER_TARGET) - { - const Metal_RTV_DIMENSION rtv_dimension = - (desc.SampleDesc.Count > 1) ? Metal_RTV_DIMENSION_TEXTURE2DMS : Metal_RTV_DIMENSION_TEXTURE2D; - const CMetal_RENDER_TARGET_VIEW_DESC rtv_desc(rtv_dimension, desc.Format, 0, 0, desc.ArraySize); - ComPtr rtv; - const HRESULT hr = device->CreateRenderTargetView(texture.Get(), &rtv_desc, rtv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create RTV for texture failed: 0x%08X", hr); - return false; - } - - rtv_dsv = std::move(rtv); - } - else if (bind_flags & Metal_BIND_DEPTH_STENCIL) - { - const Metal_DSV_DIMENSION dsv_dimension = - (desc.SampleDesc.Count > 1) ? Metal_DSV_DIMENSION_TEXTURE2DMS : Metal_DSV_DIMENSION_TEXTURE2D; - const CMetal_DEPTH_STENCIL_VIEW_DESC dsv_desc(dsv_dimension, desc.Format, 0, 0, desc.ArraySize); - ComPtr dsv; - const HRESULT hr = device->CreateDepthStencilView(texture.Get(), &dsv_desc, dsv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("Create DSV for texture failed: 0x%08X", hr); - return false; - } - - rtv_dsv = std::move(dsv); - } - - m_texture = std::move(texture); - m_srv = std::move(srv); - m_rtv_dsv = std::move(rtv_dsv); - m_width = static_cast(width); - m_height = static_cast(height); - m_layers = static_cast(layers); - m_levels = static_cast(levels); - m_samples = static_cast(samples); - m_format = format; - m_dynamic = dynamic; - return true; } -#endif void MetalTexture::Destroy() { - if (m_texture != nil) - { - [m_texture release]; - m_texture = nil; - } + if (m_texture != nil) + { + MetalDevice::DeferRelease(m_texture); + m_texture = nil; + } ClearBaseProperties(); } std::unique_ptr MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, - GPUTexture::Type type, GPUTexture::Format format, - const void* data, u32 data_stride, bool dynamic /* = false */) -{ @autoreleasepool { - if (width > m_max_texture_size || height > m_max_texture_size || samples > m_max_multisamples) - return {}; - - const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast(format)]; - if (pixel_format == MTLPixelFormatInvalid) - return {}; - - MTLTextureDescriptor* desc = [[[MTLTextureDescriptor alloc] init] autorelease]; - desc.width = width; - desc.height = height; - desc.depth = levels; - desc.pixelFormat = pixel_format; - desc.mipmapLevelCount = levels; - - switch (type) - { - case GPUTexture::Type::Texture: - desc.usage = MTLTextureUsageShaderRead; - break; - - case GPUTexture::Type::RenderTarget: - case GPUTexture::Type::DepthStencil: - desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; - break; - - case GPUTexture::Type::RWTexture: - desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; - break; - - default: - UnreachableCode(); - break; - } - - id tex = [m_device newTextureWithDescriptor:desc]; - if (tex == nil) - { - Log_ErrorPrintf("Failed to create %ux%u texture.", width, height); - return {}; - } - - // This one can *definitely* go on the upload buffer. - std::unique_ptr gtex(new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); - if (data) - { - // TODO: handle multi-level uploads... - gtex->Update(0, 0, width, height, data, data_stride, 0, 0); - } - - return gtex; -} } + GPUTexture::Type type, GPUTexture::Format format, + const void* data, u32 data_stride, bool dynamic /* = false */) +{ + @autoreleasepool + { + if (width > m_max_texture_size || height > m_max_texture_size || samples > m_max_multisamples) + return {}; -MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, - id rt_tex, id ds_tex, - MTLRenderPassDescriptor* descriptor) - : GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor) + const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast(format)]; + if (pixel_format == MTLPixelFormatInvalid) + return {}; + + MTLTextureDescriptor* desc = [[[MTLTextureDescriptor alloc] init] autorelease]; + desc.width = width; + desc.height = height; + desc.depth = levels; + desc.pixelFormat = pixel_format; + desc.mipmapLevelCount = levels; + + switch (type) + { + case GPUTexture::Type::Texture: + desc.usage = MTLTextureUsageShaderRead; + break; + + case GPUTexture::Type::RenderTarget: + case GPUTexture::Type::DepthStencil: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; + break; + + case GPUTexture::Type::RWTexture: + desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; + break; + + default: + UnreachableCode(); + break; + } + + id tex = [m_device newTextureWithDescriptor:desc]; + if (tex == nil) + { + Log_ErrorPrintf("Failed to create %ux%u texture.", width, height); + return {}; + } + + // This one can *definitely* go on the upload buffer. + std::unique_ptr gtex( + new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); + if (data) + { + // TODO: handle multi-level uploads... + gtex->Update(0, 0, width, height, data, data_stride, 0, 0); + } + + return gtex; + } +} + +MetalFramebuffer::MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id rt_tex, + id ds_tex, MTLRenderPassDescriptor* descriptor) + : GPUFramebuffer(rt, ds, width, height), m_rt_tex(rt_tex), m_ds_tex(ds_tex), m_descriptor(descriptor) { } MetalFramebuffer::~MetalFramebuffer() { - // TODO: safe deleting? - if (m_rt_tex != nil) - [m_rt_tex release]; - if (m_ds_tex != nil) - [m_ds_tex release]; - [m_descriptor release]; + // TODO: safe deleting? + if (m_rt_tex != nil) + [m_rt_tex release]; + if (m_ds_tex != nil) + [m_ds_tex release]; + [m_descriptor release]; } void MetalFramebuffer::SetDebugName(const std::string_view& name) @@ -1133,104 +1030,108 @@ void MetalFramebuffer::SetDebugName(const std::string_view& name) MTLRenderPassDescriptor* MetalFramebuffer::GetDescriptor() const { - if (m_rt && m_rt->GetState() != GPUTexture::State::Dirty) - { - switch (m_rt->GetState()) - { - case GPUTexture::State::Cleared: - { - const auto clear_color = m_rt->GetUNormClearColor(); - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear; - m_descriptor.colorAttachments[0].clearColor = MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); - m_rt->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Invalidated: - { - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare; - m_rt->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad; - } - break; - - default: - UnreachableCode(); - break; - } - } - - if (m_ds) - { - switch (m_ds->GetState()) - { - case GPUTexture::State::Cleared: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionClear; - m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth(); - m_ds->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Invalidated: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare; - m_ds->SetState(GPUTexture::State::Dirty); - } - break; - - case GPUTexture::State::Dirty: - { - m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad; - } - break; - - default: - UnreachableCode(); - break; - } - } - - return m_descriptor; + if (m_rt) + { + switch (m_rt->GetState()) + { + case GPUTexture::State::Cleared: + { + const auto clear_color = m_rt->GetUNormClearColor(); + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionClear; + m_descriptor.colorAttachments[0].clearColor = + MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare; + m_rt->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + if (m_ds) + { + switch (m_ds->GetState()) + { + case GPUTexture::State::Cleared: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionClear; + m_descriptor.depthAttachment.clearDepth = m_ds->GetClearDepth(); + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Invalidated: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionDontCare; + m_ds->SetState(GPUTexture::State::Dirty); + } + break; + + case GPUTexture::State::Dirty: + { + m_descriptor.depthAttachment.loadAction = MTLLoadActionLoad; + } + break; + + default: + UnreachableCode(); + break; + } + } + + return m_descriptor; } std::unique_ptr MetalDevice::CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, - GPUTexture* ds, u32 ds_layer, u32 ds_level) -{ @autoreleasepool { - MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; - id rt_tex = rt ? [static_cast(rt)->GetMTLTexture() retain] : nil; - id ds_tex = ds ? [static_cast(ds)->GetMTLTexture() retain] : nil; - - if (rt) - { - desc.colorAttachments[0].texture = rt_tex; - desc.colorAttachments[0].slice = rt_layer; - desc.colorAttachments[0].level = rt_level; - desc.colorAttachments[0].loadAction = MTLLoadActionLoad; - desc.colorAttachments[0].storeAction = MTLStoreActionStore; - } - - if (rt) - { - desc.depthAttachment.texture = ds_tex; - desc.depthAttachment.slice = ds_layer; - desc.depthAttachment.level = ds_level; - desc.depthAttachment.loadAction = MTLLoadActionLoad; - desc.depthAttachment.storeAction = MTLStoreActionStore; - } - - const u32 width = rt ? rt->GetMipWidth(rt_level) : ds->GetMipWidth(ds_level); - const u32 height = rt ? rt->GetMipHeight(rt_level) : ds->GetMipHeight(ds_level); - desc.renderTargetWidth = width; - desc.renderTargetHeight = height; - - return std::unique_ptr(new MetalFramebuffer(rt, ds, width, height, rt_tex, ds_tex, desc)); -} } + GPUTexture* ds, u32 ds_layer, u32 ds_level) +{ + @autoreleasepool + { + MTLRenderPassDescriptor* desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + id rt_tex = rt ? [static_cast(rt)->GetMTLTexture() retain] : nil; + id ds_tex = ds ? [static_cast(ds)->GetMTLTexture() retain] : nil; + + if (rt) + { + desc.colorAttachments[0].texture = rt_tex; + desc.colorAttachments[0].slice = rt_layer; + desc.colorAttachments[0].level = rt_level; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + desc.colorAttachments[0].storeAction = MTLStoreActionStore; + } + + if (rt) + { + desc.depthAttachment.texture = ds_tex; + desc.depthAttachment.slice = ds_layer; + desc.depthAttachment.level = ds_level; + desc.depthAttachment.loadAction = MTLLoadActionLoad; + desc.depthAttachment.storeAction = MTLStoreActionStore; + } + + const u32 width = rt ? rt->GetMipWidth(rt_level) : ds->GetMipWidth(ds_level); + const u32 height = rt ? rt->GetMipHeight(rt_level) : ds->GetMipHeight(ds_level); + desc.renderTargetWidth = width; + desc.renderTargetHeight = height; + + return std::unique_ptr(new MetalFramebuffer(rt, ds, width, height, rt_tex, ds_tex, desc)); + } +} MetalSampler::MetalSampler(id ss) : m_ss(ss) { @@ -1240,193 +1141,210 @@ MetalSampler::~MetalSampler() = default; void MetalSampler::SetDebugName(const std::string_view& name) { - // lame.. have to put it on the descriptor :/ + // lame.. have to put it on the descriptor :/ } std::unique_ptr MetalDevice::CreateSampler(const GPUSampler::Config& config) -{ @autoreleasepool { - static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ - MTLSamplerAddressModeRepeat, // Repeat - MTLSamplerAddressModeClampToEdge, // ClampToEdge - MTLSamplerAddressModeClampToBorderColor, // ClampToBorder - }}; - static constexpr std::array(GPUSampler::Filter::MaxCount)> min_mag_filters = {{ - MTLSamplerMinMagFilterNearest, // Nearest - MTLSamplerMinMagFilterLinear, // Linear - }}; - static constexpr std::array(GPUSampler::Filter::MaxCount)> mip_filters = {{ - MTLSamplerMipFilterNearest, // Nearest - MTLSamplerMipFilterLinear, // Linear - }}; - - struct BorderColorMapping - { - u32 color; - MTLSamplerBorderColor mtl_color; - }; - static constexpr BorderColorMapping border_color_mapping[] = { - {0x00000000u, MTLSamplerBorderColorTransparentBlack}, - {0xFF000000u, MTLSamplerBorderColorOpaqueBlack}, - {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite}, - }; - - MTLSamplerDescriptor* desc = [[[MTLSamplerDescriptor alloc] init] autorelease]; - desc.normalizedCoordinates = true; - desc.sAddressMode = ta[static_cast(config.address_u.GetValue())]; - desc.tAddressMode = ta[static_cast(config.address_v.GetValue())]; - desc.rAddressMode = ta[static_cast(config.address_w.GetValue())]; - desc.minFilter = min_mag_filters[static_cast(config.min_filter.GetValue())]; - desc.magFilter = min_mag_filters[static_cast(config.mag_filter.GetValue())]; - desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast(config.mip_filter.GetValue())] : MTLSamplerMipFilterNotMipmapped; - desc.lodMinClamp = static_cast(config.min_lod); - desc.lodMaxClamp = static_cast(config.max_lod); - desc.maxAnisotropy = config.anisotropy; - - if (config.address_u == GPUSampler::AddressMode::ClampToBorder || - config.address_v == GPUSampler::AddressMode::ClampToBorder || - config.address_w == GPUSampler::AddressMode::ClampToBorder) - { - u32 i; - for (i = 0; i < static_cast(std::size(border_color_mapping)); i++) - { - if (border_color_mapping[i].color == config.border_color) - break; - } - if (i == std::size(border_color_mapping)) - { - Log_ErrorPrintf("Unsupported border color: %08X", config.border_color.GetValue()); - return {}; - } - - desc.borderColor = border_color_mapping[i].mtl_color; - } - - // TODO: Pool? - id ss = [m_device newSamplerStateWithDescriptor:desc]; - if (ss == nil) - { - Log_ErrorPrintf("Failed to create sampler state."); - return {}; - } - - return std::unique_ptr(new MetalSampler([ss retain])); -} } +{ + @autoreleasepool + { + static constexpr std::array(GPUSampler::AddressMode::MaxCount)> ta = {{ + MTLSamplerAddressModeRepeat, // Repeat + MTLSamplerAddressModeClampToEdge, // ClampToEdge + MTLSamplerAddressModeClampToBorderColor, // ClampToBorder + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> min_mag_filters = + {{ + MTLSamplerMinMagFilterNearest, // Nearest + MTLSamplerMinMagFilterLinear, // Linear + }}; + static constexpr std::array(GPUSampler::Filter::MaxCount)> mip_filters = {{ + MTLSamplerMipFilterNearest, // Nearest + MTLSamplerMipFilterLinear, // Linear + }}; + struct BorderColorMapping + { + u32 color; + MTLSamplerBorderColor mtl_color; + }; + static constexpr BorderColorMapping border_color_mapping[] = { + {0x00000000u, MTLSamplerBorderColorTransparentBlack}, + {0xFF000000u, MTLSamplerBorderColorOpaqueBlack}, + {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite}, + }; + + MTLSamplerDescriptor* desc = [[[MTLSamplerDescriptor alloc] init] autorelease]; + desc.normalizedCoordinates = true; + desc.sAddressMode = ta[static_cast(config.address_u.GetValue())]; + desc.tAddressMode = ta[static_cast(config.address_v.GetValue())]; + desc.rAddressMode = ta[static_cast(config.address_w.GetValue())]; + desc.minFilter = min_mag_filters[static_cast(config.min_filter.GetValue())]; + desc.magFilter = min_mag_filters[static_cast(config.mag_filter.GetValue())]; + desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast(config.mip_filter.GetValue())] : + MTLSamplerMipFilterNotMipmapped; + desc.lodMinClamp = static_cast(config.min_lod); + desc.lodMaxClamp = static_cast(config.max_lod); + desc.maxAnisotropy = config.anisotropy; + + if (config.address_u == GPUSampler::AddressMode::ClampToBorder || + config.address_v == GPUSampler::AddressMode::ClampToBorder || + config.address_w == GPUSampler::AddressMode::ClampToBorder) + { + u32 i; + for (i = 0; i < static_cast(std::size(border_color_mapping)); i++) + { + if (border_color_mapping[i].color == config.border_color) + break; + } + if (i == std::size(border_color_mapping)) + { + Log_ErrorPrintf("Unsupported border color: %08X", config.border_color.GetValue()); + return {}; + } + + desc.borderColor = border_color_mapping[i].mtl_color; + } + + // TODO: Pool? + id ss = [m_device newSamplerStateWithDescriptor:desc]; + if (ss == nil) + { + Log_ErrorPrintf("Failed to create sampler state."); + return {}; + } + + return std::unique_ptr(new MetalSampler([ss retain])); + } +} bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data, - u32 out_data_stride) + u32 out_data_stride) { -#if 0 - const MetalTexture* tex = static_cast(texture); - if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat())) - return false; + constexpr u32 src_layer = 0; + constexpr u32 src_level = 0; - const CMetal_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), - static_cast(y + height), 1); - m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box); + const u32 copy_size = width * texture->GetPixelSize(); + const u32 pitch = Common::AlignUpPow2(copy_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 required_size = pitch * height; + if (!CheckDownloadBufferSize(required_size)) + return false; - Metal_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, Metal_MAP_READ, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Map() failed with HRESULT %08X", hr); - return false; - } + MetalTexture* T = static_cast(texture); + CommitClear(T); - const u32 copy_size = tex->GetPixelSize() * width; - StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height); - m_context->Unmap(m_readback_staging_texture.Get(), 0); - return true; -#else - return false; -#endif + @autoreleasepool + { + id encoder = GetBlitEncoder(true); + + [encoder copyFromTexture:T->GetMTLTexture() + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(x, y, 0) + sourceSize:MTLSizeMake(width, height, 1) + toBuffer:m_download_buffer + destinationOffset:0 + destinationBytesPerRow:pitch + destinationBytesPerImage:0]; + + SubmitCommandBuffer(true); + + StringUtil::StrideMemCpy(out_data, out_data_stride, [m_download_buffer contents], pitch, copy_size, height); + } + + return true; } -#if 0 -bool MetalDevice::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) +bool MetalDevice::CheckDownloadBufferSize(u32 required_size) { - if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && - m_readback_staging_texture_format == format) - return true; + if (m_download_buffer_size >= required_size) + return true; - DestroyStagingBuffer(); + @autoreleasepool + { + // We don't need to defer releasing this one, it's not going to be used. + if (m_download_buffer != nil) + [m_download_buffer release]; - CMetal_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, Metal_USAGE_STAGING, Metal_CPU_ACCESS_READ); - HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr); - return false; - } + constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceOptionCPUCacheModeDefault; + m_download_buffer = [[m_device newBufferWithLength:required_size options:options] retain]; + if (m_download_buffer == nil) + { + Log_ErrorPrintf("Failed to create %u byte download buffer", required_size); + m_download_buffer_size = 0; + return false; + } - return true; + m_download_buffer_size = required_size; + } + + return true; } -void MetalDevice::DestroyStagingBuffer() -{ - m_readback_staging_texture.Reset(); - m_readback_staging_texture_width = 0; - m_readback_staging_texture_height = 0; - m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; -} -#endif - bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const { - return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); + return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); } void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, - GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, - u32 height) + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) { - DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); - DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); - DebugAssert((src_y + height) <= src->GetMipWidth(src_level)); - DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); - DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); - DebugAssert((dst_y + height) <= dst->GetMipWidth(dst_level)); + DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); + DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); + DebugAssert((src_y + height) <= src->GetMipWidth(src_level)); + DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); + DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); + DebugAssert((dst_y + height) <= dst->GetMipWidth(dst_level)); -#if 0 - MetalTexture* dst11 = static_cast(dst); - MetalTexture* src11 = static_cast(src); + MetalTexture* D = static_cast(dst); + MetalTexture* S = static_cast(src); - if (dst11->IsRenderTargetOrDepthStencil()) - { - if (src11->GetState() == GPUTexture::State::Cleared) - { - if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) - { - // pass clear through - dst11->m_state = src11->m_state; - dst11->m_clear_value = src11->m_clear_value; - return; - } - } - else if (dst_x == 0 && dst_y == 0 && width == dst11->GetMipWidth(dst_level) && - height == dst11->GetMipHeight(dst_level)) - { - // TODO: 11.1 discard - dst11->SetState(GPUTexture::State::Dirty); - } + if (D->IsRenderTargetOrDepthStencil()) + { + if (S->GetState() == GPUTexture::State::Cleared) + { + if (S->GetWidth() == D->GetWidth() && S->GetHeight() == D->GetHeight()) + { + // pass clear through + D->m_state = S->m_state; + D->m_clear_value = S->m_clear_value; + return; + } + } + else if (S->GetState() == GPUTexture::State::Invalidated) + { + // Contents are undefined ;) + return; + } + else if (dst_x == 0 && dst_y == 0 && width == D->GetMipWidth(dst_level) && height == D->GetMipHeight(dst_level)) + { + D->SetState(GPUTexture::State::Dirty); + } - dst11->CommitClear(m_context.Get()); - } + CommitClear(D); + } - src11->CommitClear(m_context.Get()); + CommitClear(S); - const CMetal_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), - static_cast(src_y + height), 1); - m_context->CopySubresourceRegion(dst11->GetD3DTexture(), MetalCalcSubresource(dst_level, dst_layer, dst->GetLevels()), - dst_x, dst_y, 0, src11->GetD3DTexture(), - MetalCalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); -#endif - Panic("Fixme"); + @autoreleasepool + { + id encoder = GetBlitEncoder(true); + [encoder copyFromTexture:S->GetMTLTexture() + sourceSlice:src_level + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_x, src_y, 0) + sourceSize:MTLSizeMake(width, height, 1) + toTexture:D->GetMTLTexture() + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_x, dst_y, 0)]; + } } void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, - GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, - u32 height) + GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, + u32 height) { #if 0 DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); @@ -1450,65 +1368,95 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 src11->GetD3DTexture(), MetalCalcSubresource(src_level, src_layer, src->GetLevels()), dst11->GetDXGIFormat()); #else - Panic("Fixme"); + Panic("Fixme"); #endif } -#if 0 +void MetalDevice::CommitClear(MetalTexture* tex) +{ + if (tex->GetState() == GPUTexture::State::Dirty) + return; + + DebugAssert(tex->IsRenderTargetOrDepthStencil()); + + if (tex->GetState() == GPUTexture::State::Cleared) + { + // TODO: We could combine it with the current render pass. + if (InRenderPass()) + EndRenderPass(); + + @autoreleasepool + { + // Allocating here seems a bit sad. + MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; + desc.renderTargetWidth = tex->GetWidth(); + desc.renderTargetHeight = tex->GetHeight(); + if (tex->IsRenderTarget()) + { + const auto cc = tex->GetUNormClearColor(); + desc.colorAttachments[0].texture = tex->GetMTLTexture(); + desc.colorAttachments[0].loadAction = MTLLoadActionClear; + desc.colorAttachments[0].storeAction = MTLStoreActionStore; + desc.colorAttachments[0].clearColor = MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]); + } + else + { + desc.depthAttachment.texture = tex->GetMTLTexture(); + desc.depthAttachment.loadAction = MTLLoadActionClear; + desc.depthAttachment.storeAction = MTLStoreActionStore; + desc.depthAttachment.clearDepth = tex->GetClearDepth(); + } + + id encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; + [encoder endEncoding]; + } + } +} MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) { } -MetalTextureBuffer::~MetalTextureBuffer() = default; - -bool MetalTextureBuffer::CreateBuffer(IMetalDevice* device) +MetalTextureBuffer::~MetalTextureBuffer() { - if (!m_buffer.Create(device, Metal_BIND_SHADER_RESOURCE, GetSizeInBytes())) - return false; + if (m_buffer.IsValid()) + MetalDevice::GetInstance().UnbindTextureBuffer(this); + m_buffer.Destroy(); +} - static constexpr std::array(Format::MaxCount)> dxgi_formats = {{ - DXGI_FORMAT_R16_UINT, - }}; - - CMetal_SHADER_RESOURCE_VIEW_DESC srv_desc(m_buffer.GetD3DBuffer(), dxgi_formats[static_cast(m_format)], 0, - m_size_in_elements); - const HRESULT hr = device->CreateShaderResourceView(m_buffer.GetD3DBuffer(), &srv_desc, m_srv.GetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateShaderResourceView() failed: %08X", hr); - return false; - } - - return true; +bool MetalTextureBuffer::CreateBuffer(id device) +{ + return m_buffer.Create(device, GetSizeInBytes()); } void* MetalTextureBuffer::Map(u32 required_elements) { const u32 esize = GetElementSize(m_format); - const auto res = m_buffer.Map(MetalDevice::GetD3DContext(), esize, esize * required_elements); - m_current_position = res.index_aligned; - return res.pointer; + const u32 req_size = esize * required_elements; + if (!m_buffer.ReserveMemory(req_size, esize)) + { + MetalDevice::GetInstance().SubmitCommandBufferAndRestartRenderPass("out of space in texture buffer"); + if (!m_buffer.ReserveMemory(req_size, esize)) + Panic("Failed to allocate texture buffer space."); + } + + m_current_position = m_buffer.GetCurrentOffset() / esize; + return m_buffer.GetCurrentHostPointer(); } void MetalTextureBuffer::Unmap(u32 used_elements) { - m_buffer.Unmap(MetalDevice::GetD3DContext(), used_elements * GetElementSize(m_format)); + m_buffer.CommitMemory(GetElementSize(m_format) * used_elements); } -#endif std::unique_ptr MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) { -#if 0 std::unique_ptr tb = std::make_unique(format, size_in_elements); - if (!tb->CreateBuffer(m_device.Get())) + if (!tb->CreateBuffer(m_device)) tb.reset(); return tb; -#else - return {}; -#endif } void MetalDevice::PushDebugGroup(const char* fmt, ...) @@ -1526,119 +1474,132 @@ void MetalDevice::InsertDebugMessage(const char* fmt, ...) void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) { - const u32 req_size = vertex_size * vertex_count; - if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) - { - SubmitCommandBufferAndRestartRenderPass("out of vertex space"); - if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) - Panic("Failed to allocate vertex space"); - } - - *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); - *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; - *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; + const u32 req_size = vertex_size * vertex_count; + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + { + SubmitCommandBufferAndRestartRenderPass("out of vertex space"); + if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) + Panic("Failed to allocate vertex space"); + } + + *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); + *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; + *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; } void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { - m_vertex_buffer.CommitMemory(vertex_size * vertex_count); + m_vertex_buffer.CommitMemory(vertex_size * vertex_count); } void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) { - const u32 req_size = sizeof(DrawIndex) * index_count; - if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) - { - SubmitCommandBufferAndRestartRenderPass("out of index space"); - if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) - Panic("Failed to allocate index space"); - } - - *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); - *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); - *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); + const u32 req_size = sizeof(DrawIndex) * index_count; + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + { + SubmitCommandBufferAndRestartRenderPass("out of index space"); + if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) + Panic("Failed to allocate index space"); + } + + *map_ptr = reinterpret_cast(m_index_buffer.GetCurrentHostPointer()); + *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); + *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); } void MetalDevice::UnmapIndexBuffer(u32 used_index_count) { - m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); + m_index_buffer.CommitMemory(sizeof(DrawIndex) * used_index_count); } void MetalDevice::PushUniformBuffer(const void* data, u32 data_size) { - void* map = MapUniformBuffer(data_size); - std::memcpy(map, data, data_size); - UnmapUniformBuffer(data_size); + void* map = MapUniformBuffer(data_size); + std::memcpy(map, data, data_size); + UnmapUniformBuffer(data_size); } void* MetalDevice::MapUniformBuffer(u32 size) { const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); - if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) - { - SubmitCommandBufferAndRestartRenderPass("out of uniform space"); - if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) - Panic("Failed to allocate uniform space."); - } - - return m_uniform_buffer.GetCurrentHostPointer(); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + { + SubmitCommandBufferAndRestartRenderPass("out of uniform space"); + if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) + Panic("Failed to allocate uniform space."); + } + + return m_uniform_buffer.GetCurrentHostPointer(); } void MetalDevice::UnmapUniformBuffer(u32 size) { - m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); - m_uniform_buffer.CommitMemory(size); - if (InRenderPass()) - SetUniformBufferInRenderEncoder(); + m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); + m_uniform_buffer.CommitMemory(size); + if (InRenderPass()) + SetUniformBufferInRenderEncoder(); } void MetalDevice::SetFramebuffer(GPUFramebuffer* fb) { -#if 0 if (m_current_framebuffer == fb) return; - EndRenderPass(); + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = static_cast(fb); -#else - Panic("Fixme"); -#endif + + // Current pipeline might be incompatible, so unbind it. + // Otherwise it'll get bound to the new render encoder. + m_current_pipeline = nullptr; + m_current_depth_state = nil; } void MetalDevice::UnbindFramebuffer(MetalFramebuffer* fb) { -#if 0 if (m_current_framebuffer != fb) return; - EndRenderPass(); + if (InRenderPass()) + EndRenderPass(); + m_current_framebuffer = nullptr; +} + +void MetalDevice::UnbindFramebuffer(MetalTexture* tex) +{ + if (!m_current_framebuffer) + return; + + if (m_current_framebuffer->GetRT() != tex && m_current_framebuffer->GetDS() != tex) + return; + + if (InRenderPass()) + EndRenderPass(); m_current_framebuffer = nullptr; -#else - Panic("Fixme"); -#endif } void MetalDevice::SetPipeline(GPUPipeline* pipeline) { - if (m_current_pipeline == pipeline) - return; - - m_current_pipeline = static_cast(pipeline); - if (InRenderPass()) - { - [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; - - if (m_current_depth_state != m_current_pipeline->GetDepthState()) - { - m_current_depth_state = m_current_pipeline->GetDepthState(); - [m_render_encoder setDepthStencilState:m_current_depth_state]; - } - if (m_current_cull_mode != m_current_pipeline->GetCullMode()) - { - m_current_cull_mode = m_current_pipeline->GetCullMode(); - [m_render_encoder setCullMode:m_current_cull_mode]; - } - } + if (m_current_pipeline == pipeline) + return; + + m_current_pipeline = static_cast(pipeline); + if (InRenderPass()) + { + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + + if (m_current_depth_state != m_current_pipeline->GetDepthState()) + { + m_current_depth_state = m_current_pipeline->GetDepthState(); + [m_render_encoder setDepthStencilState:m_current_depth_state]; + } + if (m_current_cull_mode != m_current_pipeline->GetCullMode()) + { + m_current_cull_mode = m_current_pipeline->GetCullMode(); + [m_render_encoder setCullMode:m_current_cull_mode]; + } + } } void MetalDevice::UnbindPipeline(MetalPipeline* pl) @@ -1651,305 +1612,388 @@ void MetalDevice::UnbindPipeline(MetalPipeline* pl) void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) { - DebugAssert(slot < MAX_TEXTURE_SAMPLERS); + DebugAssert(slot < MAX_TEXTURE_SAMPLERS); - id T = texture ? static_cast(texture)->GetMTLTexture() : nil; - id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; + id T = texture ? static_cast(texture)->GetMTLTexture() : nil; + id S = sampler ? static_cast(sampler)->GetSamplerState() : nil; - if (m_current_textures[slot] != T) - { - m_current_textures[slot] = T; - if (InRenderPass()) - [m_render_encoder setFragmentTexture:T atIndex:slot]; - } - - if (m_current_samplers[slot] != S) - { - m_current_samplers[slot] = S; - if (InRenderPass()) - [m_render_encoder setFragmentSamplerState:S atIndex:slot]; - } + if (m_current_textures[slot] != T) + { + m_current_textures[slot] = T; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:T atIndex:slot]; + } + + if (m_current_samplers[slot] != S) + { + m_current_samplers[slot] = S; + if (InRenderPass()) + [m_render_encoder setFragmentSamplerState:S atIndex:slot]; + } } void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) { -#if 0 - MetalTextureBuffer* B = static_cast(buffer); - m_context->PSSetShaderResources(0, 1, B->GetSRVArray()); -#else - Panic("Fixme"); -#endif + id B = buffer ? static_cast(buffer)->GetMTLBuffer() : nil; + if (m_current_ssbo == B) + return; + + m_current_ssbo = B; + if (InRenderPass()) + [m_render_encoder setFragmentBuffer:B offset:0 atIndex:1]; } void MetalDevice::UnbindTexture(MetalTexture* tex) { - const id T = tex->GetMTLTexture(); - for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) - { - if (m_current_textures[i] == T) - { - m_current_textures[i] = nil; - if (InRenderPass()) - [m_render_encoder setFragmentTexture:nil atIndex:i]; - } - } + const id T = tex->GetMTLTexture(); + for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) + { + if (m_current_textures[i] == T) + { + m_current_textures[i] = nil; + if (InRenderPass()) + [m_render_encoder setFragmentTexture:nil atIndex:i]; + } + } +} + +void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf) +{ + if (m_current_ssbo != buf->GetMTLBuffer()) + return; + + m_current_ssbo = nil; + if (InRenderPass()) + [m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1]; } void MetalDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) { - const Common::Rectangle new_vp = Common::Rectangle::FromExtents(x, y, width, height); - if (new_vp == m_current_viewport) - return; - - m_current_viewport = new_vp; - if (InRenderPass()) - SetViewportInRenderEncoder(); + const Common::Rectangle new_vp = Common::Rectangle::FromExtents(x, y, width, height); + if (new_vp == m_current_viewport) + return; + + m_current_viewport = new_vp; + if (InRenderPass()) + SetViewportInRenderEncoder(); } void MetalDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) { - const Common::Rectangle new_sr = Common::Rectangle::FromExtents(x, y, width, height); - if (new_sr == m_current_scissor) - return; - - m_current_scissor = new_sr; - if (InRenderPass()) - SetScissorInRenderEncoder(); + const Common::Rectangle new_sr = Common::Rectangle::FromExtents(x, y, width, height); + if (new_sr == m_current_scissor) + return; + + m_current_scissor = new_sr; + if (InRenderPass()) + SetScissorInRenderEncoder(); } void MetalDevice::BeginRenderPass() { - DebugAssert(m_render_encoder == nil); - - // Inline writes :( - if (m_inline_upload_encoder != nil) - { - [m_inline_upload_encoder endEncoding]; - [m_inline_upload_encoder release]; - } - - MTLRenderPassDescriptor* desc; - if (!m_current_framebuffer) - { - // Rendering to view, but we got interrupted... - desc = [MTLRenderPassDescriptor renderPassDescriptor]; - desc.colorAttachments[0].texture = [m_layer_drawable texture]; - desc.colorAttachments[0].loadAction = MTLLoadActionLoad; - } - else - { - desc = m_current_framebuffer->GetDescriptor(); - } - - m_render_encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; + DebugAssert(m_render_encoder == nil); + + // Inline writes :( + if (m_inline_upload_encoder != nil) + { + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + m_inline_upload_encoder = nil; + } + + MTLRenderPassDescriptor* desc; + if (!m_current_framebuffer) + { + // Rendering to view, but we got interrupted... + desc = [MTLRenderPassDescriptor renderPassDescriptor]; + desc.colorAttachments[0].texture = [m_layer_drawable texture]; + desc.colorAttachments[0].loadAction = MTLLoadActionLoad; + } + else + { + desc = m_current_framebuffer->GetDescriptor(); + } + + m_render_encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; + SetInitialEncoderState(); } void MetalDevice::EndRenderPass() { - DebugAssert(InRenderPass() && !IsInlineUploading()); - [m_render_encoder endEncoding]; - [m_render_encoder release]; - m_render_encoder = nil; + DebugAssert(InRenderPass() && !IsInlineUploading()); + [m_render_encoder endEncoding]; + [m_render_encoder release]; + m_render_encoder = nil; } void MetalDevice::EndInlineUploading() { - DebugAssert(IsInlineUploading() && !InRenderPass()); - [m_inline_upload_encoder endEncoding]; - [m_inline_upload_encoder release]; - m_inline_upload_encoder = nil; + DebugAssert(IsInlineUploading() && !InRenderPass()); + [m_inline_upload_encoder endEncoding]; + [m_inline_upload_encoder release]; + m_inline_upload_encoder = nil; } void MetalDevice::EndAnyEncoding() { - if (InRenderPass()) - EndRenderPass(); - else if (IsInlineUploading()) - EndInlineUploading(); + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); } void MetalDevice::SetInitialEncoderState() { - // Set initial state. - // TODO: avoid uniform set here? it's probably going to get changed... - SetUniformBufferInRenderEncoder(); - [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1]; - [m_render_encoder setCullMode:m_current_cull_mode]; - if (m_current_depth_state != nil) - [m_render_encoder setDepthStencilState:m_current_depth_state]; - if (m_current_pipeline != nil) - [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; - [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; - [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; - SetViewportInRenderEncoder(); - SetScissorInRenderEncoder(); + // Set initial state. + // TODO: avoid uniform set here? it's probably going to get changed... + // Might be better off just deferring all the init until the first draw... + SetUniformBufferInRenderEncoder(); + [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1]; + [m_render_encoder setCullMode:m_current_cull_mode]; + if (m_current_depth_state != nil) + [m_render_encoder setDepthStencilState:m_current_depth_state]; + if (m_current_pipeline != nil) + [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; + [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; + if (m_current_ssbo) + [m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1]; + SetViewportInRenderEncoder(); + SetScissorInRenderEncoder(); } void MetalDevice::SetUniformBufferInRenderEncoder() { - [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; - [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; + [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; + [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; } void MetalDevice::SetViewportInRenderEncoder() { - [m_render_encoder setViewport:(MTLViewport){ - static_cast(m_current_viewport.left), static_cast(m_current_viewport.top), - static_cast(m_current_viewport.GetWidth()), static_cast(m_current_viewport.GetHeight()), - 0.0, 1.0 }]; + const Common::Rectangle rc = ClampToFramebufferSize(m_current_viewport); + [m_render_encoder + setViewport:(MTLViewport){static_cast(rc.left), static_cast(rc.top), + static_cast(rc.GetWidth()), static_cast(rc.GetHeight()), 0.0, 1.0}]; } void MetalDevice::SetScissorInRenderEncoder() { - [m_render_encoder setScissorRect:(MTLScissorRect){ - static_cast(m_current_scissor.left), static_cast(m_current_scissor.top), - static_cast(m_current_scissor.GetWidth()), static_cast(m_current_scissor.GetHeight()) - }]; + const Common::Rectangle rc = ClampToFramebufferSize(m_current_scissor); + [m_render_encoder + setScissorRect:(MTLScissorRect){static_cast(rc.left), static_cast(rc.top), + static_cast(rc.GetWidth()), static_cast(rc.GetHeight())}]; +} + +Common::Rectangle MetalDevice::ClampToFramebufferSize(const Common::Rectangle& rc) const +{ + const s32 clamp_width = m_current_framebuffer ? m_current_framebuffer->GetWidth() : m_window_info.surface_width; + const s32 clamp_height = m_current_framebuffer ? m_current_framebuffer->GetHeight() : m_window_info.surface_height; + return rc.ClampedSize(clamp_width, clamp_height); } void MetalDevice::PreDrawCheck() { - if (!InRenderPass()) - BeginRenderPass(); + if (!InRenderPass()) + BeginRenderPass(); } void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) { PreDrawCheck(); - [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; + [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; } void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) { PreDrawCheck(); - - const u32 index_offset = base_index * sizeof(u16); - [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() indexCount:index_count indexType:MTLIndexTypeUInt16 indexBuffer:m_index_buffer.GetBuffer() indexBufferOffset:index_offset instanceCount:1 baseVertex:base_vertex baseInstance:0]; + + const u32 index_offset = base_index * sizeof(u16); + [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() + indexCount:index_count + indexType:MTLIndexTypeUInt16 + indexBuffer:m_index_buffer.GetBuffer() + indexBufferOffset:index_offset + instanceCount:1 + baseVertex:base_vertex + baseInstance:0]; } -id MetalDevice::GetTextureUploadEncoder(bool is_inline) -{ @autoreleasepool { - if (!is_inline) - { - if (!m_upload_cmdbuf) - { - m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; - m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain]; - [m_upload_encoder setLabel:@"Upload Encoder"]; - } - return m_upload_encoder; - } - - // Interleaved with draws. - if (m_inline_upload_encoder != nil) - return m_inline_upload_encoder; - - if (InRenderPass()) - EndRenderPass(); - m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain]; - return m_inline_upload_encoder; -} } +id MetalDevice::GetBlitEncoder(bool is_inline) +{ + @autoreleasepool + { + if (!is_inline) + { + if (!m_upload_cmdbuf) + { + m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain]; + [m_upload_encoder setLabel:@"Upload Encoder"]; + } + return m_upload_encoder; + } + + // Interleaved with draws. + if (m_inline_upload_encoder != nil) + return m_inline_upload_encoder; + + if (InRenderPass()) + EndRenderPass(); + m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain]; + return m_inline_upload_encoder; + } +} bool MetalDevice::BeginPresent(bool skip_present) -{ @autoreleasepool { - if (skip_present || m_layer == nil) - return false; +{ + @autoreleasepool + { + if (skip_present || m_layer == nil) + return false; - EndAnyEncoding(); - - m_layer_drawable = [[m_layer nextDrawable] retain]; - if (m_layer_drawable == nil) - return false; - - // Set up rendering to layer. - id layer_texture = [m_layer_drawable texture]; - m_current_framebuffer = nullptr; - m_layer_pass_desc.colorAttachments[0].texture = layer_texture; - m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; - m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; - SetInitialEncoderState(); - return true; -} } + EndAnyEncoding(); + + m_layer_drawable = [[m_layer nextDrawable] retain]; + if (m_layer_drawable == nil) + return false; + + SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); + + // Set up rendering to layer. + id layer_texture = [m_layer_drawable texture]; + m_current_framebuffer = nullptr; + m_layer_pass_desc.colorAttachments[0].texture = layer_texture; + m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; + m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; + m_current_pipeline = nullptr; + m_current_depth_state = nil; + SetInitialEncoderState(); + return true; + } +} void MetalDevice::EndPresent() { - DebugAssert(!m_current_framebuffer); - EndAnyEncoding(); - - [m_render_cmdbuf presentDrawable:m_layer_drawable]; - [m_layer_drawable release]; - SubmitCommandBuffer(); + DebugAssert(!m_current_framebuffer); + EndAnyEncoding(); + + [m_render_cmdbuf presentDrawable:m_layer_drawable]; + [m_layer_drawable release]; + SubmitCommandBuffer(); } void MetalDevice::CreateCommandBuffer() -{ @autoreleasepool { - DebugAssert(m_render_cmdbuf == nil); - const u64 fence_counter = ++m_current_fence_counter; - m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; - [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id) { - CommandBufferCompleted(fence_counter); - }]; -} } - -void MetalDevice::CommandBufferCompleted(u64 fence_counter) { - std::unique_lock lock(m_fence_mutex); - m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), std::memory_order_release); + @autoreleasepool + { + DebugAssert(m_render_cmdbuf == nil); + const u64 fence_counter = ++m_current_fence_counter; + m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; + [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id) { + CommandBufferCompletedOffThread(fence_counter); + }]; + } + + CleanupObjects(); } -void MetalDevice::SubmitCommandBuffer() +void MetalDevice::CommandBufferCompletedOffThread(u64 fence_counter) { - if (m_upload_cmdbuf != nil) - { - [m_upload_encoder endEncoding]; - [m_upload_encoder release]; - m_upload_encoder = nil; - [m_upload_cmdbuf commit]; - [m_upload_cmdbuf release]; - m_upload_cmdbuf = nil; - } - - if (m_render_cmdbuf != nil) - { - if (InRenderPass()) - EndRenderPass(); - else if (IsInlineUploading()) - EndInlineUploading(); + std::unique_lock lock(m_fence_mutex); + m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), + std::memory_order_release); +} - [m_render_cmdbuf commit]; - [m_render_cmdbuf release]; - m_render_cmdbuf = nil; - } - - CreateCommandBuffer(); +void MetalDevice::SubmitCommandBuffer(bool wait_for_completion) +{ + if (m_upload_cmdbuf != nil) + { + [m_upload_encoder endEncoding]; + [m_upload_encoder release]; + m_upload_encoder = nil; + [m_upload_cmdbuf commit]; + [m_upload_cmdbuf release]; + m_upload_cmdbuf = nil; + } + + if (m_render_cmdbuf != nil) + { + if (InRenderPass()) + EndRenderPass(); + else if (IsInlineUploading()) + EndInlineUploading(); + + [m_render_cmdbuf commit]; + + if (wait_for_completion) + [m_render_cmdbuf waitUntilCompleted]; + + [m_render_cmdbuf release]; + m_render_cmdbuf = nil; + } + + CreateCommandBuffer(); } void MetalDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) { - Log_DevPrintf("Submitting command buffer and restarting render pass due to %s", reason); + Log_DevPrintf("Submitting command buffer and restarting render pass due to %s", reason); - const bool in_render_pass = InRenderPass(); - SubmitCommandBuffer(); - if (in_render_pass) - BeginRenderPass(); + const bool in_render_pass = InRenderPass(); + SubmitCommandBuffer(); + if (in_render_pass) + BeginRenderPass(); } void MetalDevice::WaitForFenceCounter(u64 counter) { - if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter) - return; - - // TODO: There has to be a better way to do this.. - std::unique_lock lock(m_fence_mutex); - while (m_completed_fence_counter.load(std::memory_order_acquire) < counter) - { - lock.unlock(); - pthread_yield_np(); - lock.lock(); - } + if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter) + return; + + // TODO: There has to be a better way to do this.. + std::unique_lock lock(m_fence_mutex); + while (m_completed_fence_counter.load(std::memory_order_acquire) < counter) + { + lock.unlock(); + pthread_yield_np(); + lock.lock(); + } + + CleanupObjects(); +} + +void MetalDevice::WaitForPreviousCommandBuffers() +{ + // Early init? + if (m_current_fence_counter == 0) + return; + + WaitForFenceCounter(m_current_fence_counter - 1); +} + +void MetalDevice::CleanupObjects() +{ + const u64 counter = m_completed_fence_counter.load(std::memory_order_acquire); + while (m_cleanup_objects.size() > 0 && m_cleanup_objects.front().first <= counter) + { + [m_cleanup_objects.front().second release]; + m_cleanup_objects.pop_front(); + } +} + +void MetalDevice::DeferRelease(id obj) +{ + MetalDevice& dev = GetInstance(); + dev.m_cleanup_objects.emplace_back(dev.m_current_fence_counter, obj); +} + +void MetalDevice::DeferRelease(u64 fence_counter, id obj) +{ + MetalDevice& dev = GetInstance(); + dev.m_cleanup_objects.emplace_back(fence_counter, obj); } std::unique_ptr WrapNewMetalDevice() { - return std::unique_ptr(new MetalDevice()); + return std::unique_ptr(new MetalDevice()); } diff --git a/src/core/gpu/metal_stream_buffer.h b/src/core/gpu/metal_stream_buffer.h index d3a71223a..65ccfa54f 100644 --- a/src/core/gpu/metal_stream_buffer.h +++ b/src/core/gpu/metal_stream_buffer.h @@ -22,44 +22,44 @@ class MetalStreamBuffer { public: - MetalStreamBuffer(); - MetalStreamBuffer(MetalStreamBuffer&& move) = delete; - MetalStreamBuffer(const MetalStreamBuffer&) = delete; - ~MetalStreamBuffer(); + MetalStreamBuffer(); + MetalStreamBuffer(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer(const MetalStreamBuffer&) = delete; + ~MetalStreamBuffer(); - MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete; - MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete; + MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete; + MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete; - ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); } - ALWAYS_INLINE id GetBuffer() const { return m_buffer; } - ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } - ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } - ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } - ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } - ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } + ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); } + ALWAYS_INLINE id GetBuffer() const { return m_buffer; } + ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } + ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } + ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } + ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } + ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } - bool Create(id device, u32 size); - void Destroy(); + bool Create(id device, u32 size); + void Destroy(); - bool ReserveMemory(u32 num_bytes, u32 alignment); - void CommitMemory(u32 final_num_bytes); + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); private: - bool AllocateBuffer(u32 size); - void UpdateCurrentFencePosition(); - void UpdateGPUPosition(); + bool AllocateBuffer(u32 size); + void UpdateCurrentFencePosition(); + void UpdateGPUPosition(); - // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. - bool WaitForClearSpace(u32 num_bytes); + // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. + bool WaitForClearSpace(u32 num_bytes); - u32 m_size = 0; - u32 m_current_offset = 0; - u32 m_current_space = 0; - u32 m_current_gpu_position = 0; + u32 m_size = 0; + u32 m_current_offset = 0; + u32 m_current_space = 0; + u32 m_current_gpu_position = 0; - id m_buffer = nil; - u8* m_host_pointer = nullptr; + id m_buffer = nil; + u8* m_host_pointer = nullptr; - // List of fences and the corresponding positions in the buffer - std::deque> m_tracked_fences; + // List of fences and the corresponding positions in the buffer + std::deque> m_tracked_fences; }; diff --git a/src/core/gpu/metal_stream_buffer.mm b/src/core/gpu/metal_stream_buffer.mm index 7aa4d75f6..7d6bce3b6 100644 --- a/src/core/gpu/metal_stream_buffer.mm +++ b/src/core/gpu/metal_stream_buffer.mm @@ -14,240 +14,242 @@ MetalStreamBuffer::MetalStreamBuffer() = default; MetalStreamBuffer::~MetalStreamBuffer() { - if (IsValid()) - Destroy(); + if (IsValid()) + Destroy(); } bool MetalStreamBuffer::Create(id device, u32 size) -{ @autoreleasepool { - const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; - - id new_buffer = [device newBufferWithLength:size options:options]; - if (new_buffer == nil) - { - Log_ErrorPrintf("Failed to create buffer."); - return false; - } - - if (IsValid()) - Destroy(); - - // Replace with the new buffer - m_size = size; - m_current_offset = 0; - m_current_gpu_position = 0; - m_tracked_fences.clear(); - m_buffer = [new_buffer retain]; - m_host_pointer = static_cast([new_buffer contents]); - return true; -} } +{ + @autoreleasepool + { + const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + + id new_buffer = [device newBufferWithLength:size options:options]; + if (new_buffer == nil) + { + Log_ErrorPrintf("Failed to create buffer."); + return false; + } + + if (IsValid()) + Destroy(); + + // Replace with the new buffer + m_size = size; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + m_buffer = [new_buffer retain]; + m_host_pointer = static_cast([new_buffer contents]); + return true; + } +} void MetalStreamBuffer::Destroy() { - m_size = 0; - m_current_offset = 0; - m_current_gpu_position = 0; - m_tracked_fences.clear(); - [m_buffer release]; - m_buffer = nil; - m_host_pointer = nullptr; + m_size = 0; + m_current_offset = 0; + m_current_gpu_position = 0; + m_tracked_fences.clear(); + [m_buffer release]; + m_buffer = nil; + m_host_pointer = nullptr; } bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) { - const u32 required_bytes = num_bytes + alignment; + const u32 required_bytes = num_bytes + alignment; - // Check for sane allocations - if (required_bytes > m_size) - { - Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast(num_bytes), - static_cast(m_size)); - Panic("Stream buffer overflow"); - return false; - } + // Check for sane allocations + if (required_bytes > m_size) + { + Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast(num_bytes), + static_cast(m_size)); + Panic("Stream buffer overflow"); + return false; + } - UpdateGPUPosition(); + UpdateGPUPosition(); - // Is the GPU behind or up to date with our current offset? - if (m_current_offset >= m_current_gpu_position) - { - const u32 remaining_bytes = m_size - m_current_offset; - if (required_bytes <= remaining_bytes) - { - // Place at the current position, after the GPU position. - m_current_offset = Common::AlignUp(m_current_offset, alignment); - m_current_space = m_size - m_current_offset; - return true; - } + // Is the GPU behind or up to date with our current offset? + if (m_current_offset >= m_current_gpu_position) + { + const u32 remaining_bytes = m_size - m_current_offset; + if (required_bytes <= remaining_bytes) + { + // Place at the current position, after the GPU position. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_size - m_current_offset; + return true; + } - // Check for space at the start of the buffer - // We use < here because we don't want to have the case of m_current_offset == - // m_current_gpu_position. That would mean the code above would assume the - // GPU has caught up to us, which it hasn't. - if (required_bytes < m_current_gpu_position) - { - // Reset offset to zero, since we're allocating behind the gpu now - m_current_offset = 0; - m_current_space = m_current_gpu_position - 1; - return true; - } - } + // Check for space at the start of the buffer + // We use < here because we don't want to have the case of m_current_offset == + // m_current_gpu_position. That would mean the code above would assume the + // GPU has caught up to us, which it hasn't. + if (required_bytes < m_current_gpu_position) + { + // Reset offset to zero, since we're allocating behind the gpu now + m_current_offset = 0; + m_current_space = m_current_gpu_position - 1; + return true; + } + } - // Is the GPU ahead of our current offset? - if (m_current_offset < m_current_gpu_position) - { - // We have from m_current_offset..m_current_gpu_position space to use. - const u32 remaining_bytes = m_current_gpu_position - m_current_offset; - if (required_bytes < remaining_bytes) - { - // Place at the current position, since this is still behind the GPU. - m_current_offset = Common::AlignUp(m_current_offset, alignment); - m_current_space = m_current_gpu_position - m_current_offset - 1; - return true; - } - } + // Is the GPU ahead of our current offset? + if (m_current_offset < m_current_gpu_position) + { + // We have from m_current_offset..m_current_gpu_position space to use. + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; + if (required_bytes < remaining_bytes) + { + // Place at the current position, since this is still behind the GPU. + m_current_offset = Common::AlignUp(m_current_offset, alignment); + m_current_space = m_current_gpu_position - m_current_offset - 1; + return true; + } + } - // Can we find a fence to wait on that will give us enough memory? - if (WaitForClearSpace(required_bytes)) - { - const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; - m_current_offset += align_diff; - m_current_space -= align_diff; - return true; - } + // Can we find a fence to wait on that will give us enough memory? + if (WaitForClearSpace(required_bytes)) + { + const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; + m_current_offset += align_diff; + m_current_space -= align_diff; + return true; + } - // We tried everything we could, and still couldn't get anything. This means that too much space - // in the buffer is being used by the command buffer currently being recorded. Therefore, the - // only option is to execute it, and wait until it's done. - return false; + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. + return false; } void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) { - DebugAssert((m_current_offset + final_num_bytes) <= m_size); - DebugAssert(final_num_bytes <= m_current_space); + DebugAssert((m_current_offset + final_num_bytes) <= m_size); + DebugAssert(final_num_bytes <= m_current_space); - m_current_offset += final_num_bytes; - m_current_space -= final_num_bytes; - UpdateCurrentFencePosition(); + m_current_offset += final_num_bytes; + m_current_space -= final_num_bytes; + UpdateCurrentFencePosition(); } void MetalStreamBuffer::UpdateCurrentFencePosition() { - // Has the offset changed since the last fence? - const u64 counter = MetalDevice::GetCurrentFenceCounter(); - if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) - { - // Still haven't executed a command buffer, so just update the offset. - m_tracked_fences.back().second = m_current_offset; - return; - } + // Has the offset changed since the last fence? + const u64 counter = MetalDevice::GetCurrentFenceCounter(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) + { + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; + return; + } - // New buffer, so update the GPU position while we're at it. - m_tracked_fences.emplace_back(counter, m_current_offset); + // New buffer, so update the GPU position while we're at it. + m_tracked_fences.emplace_back(counter, m_current_offset); } void MetalStreamBuffer::UpdateGPUPosition() { - auto start = m_tracked_fences.begin(); - auto end = start; + auto start = m_tracked_fences.begin(); + auto end = start; - const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); - while (end != m_tracked_fences.end() && completed_counter >= end->first) - { - m_current_gpu_position = end->second; - ++end; - } + const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); + while (end != m_tracked_fences.end() && completed_counter >= end->first) + { + m_current_gpu_position = end->second; + ++end; + } - if (start != end) - { - m_tracked_fences.erase(start, end); - if (m_current_offset == m_current_gpu_position) - { - // GPU is all caught up now. - m_current_offset = 0; - m_current_gpu_position = 0; - m_current_space = m_size; - } - } + if (start != end) + { + m_tracked_fences.erase(start, end); + if (m_current_offset == m_current_gpu_position) + { + // GPU is all caught up now. + m_current_offset = 0; + m_current_gpu_position = 0; + m_current_space = m_size; + } + } } bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) { - u32 new_offset = 0; - u32 new_space = 0; - u32 new_gpu_position = 0; + u32 new_offset = 0; + u32 new_space = 0; + u32 new_gpu_position = 0; - auto iter = m_tracked_fences.begin(); - for (; iter != m_tracked_fences.end(); ++iter) - { - // Would this fence bring us in line with the GPU? - // This is the "last resort" case, where a command buffer execution has been forced - // after no additional data has been written to it, so we can assume that after the - // fence has been signaled the entire buffer is now consumed. - u32 gpu_position = iter->second; - if (m_current_offset == gpu_position) - { - new_offset = 0; - new_space = m_size; - new_gpu_position = 0; - break; - } + auto iter = m_tracked_fences.begin(); + for (; iter != m_tracked_fences.end(); ++iter) + { + // Would this fence bring us in line with the GPU? + // This is the "last resort" case, where a command buffer execution has been forced + // after no additional data has been written to it, so we can assume that after the + // fence has been signaled the entire buffer is now consumed. + u32 gpu_position = iter->second; + if (m_current_offset == gpu_position) + { + new_offset = 0; + new_space = m_size; + new_gpu_position = 0; + break; + } - // Assuming that we wait for this fence, are we allocating in front of the GPU? - if (m_current_offset > gpu_position) - { - // This would suggest the GPU has now followed us and wrapped around, so we have from - // m_current_position..m_size free, as well as and 0..gpu_position. - const u32 remaining_space_after_offset = m_size - m_current_offset; - if (remaining_space_after_offset >= num_bytes) - { - // Switch to allocating in front of the GPU, using the remainder of the buffer. - new_offset = m_current_offset; - new_space = m_size - m_current_offset; - new_gpu_position = gpu_position; - break; - } + // Assuming that we wait for this fence, are we allocating in front of the GPU? + if (m_current_offset > gpu_position) + { + // This would suggest the GPU has now followed us and wrapped around, so we have from + // m_current_position..m_size free, as well as and 0..gpu_position. + const u32 remaining_space_after_offset = m_size - m_current_offset; + if (remaining_space_after_offset >= num_bytes) + { + // Switch to allocating in front of the GPU, using the remainder of the buffer. + new_offset = m_current_offset; + new_space = m_size - m_current_offset; + new_gpu_position = gpu_position; + break; + } - // We can wrap around to the start, behind the GPU, if there is enough space. - // We use > here because otherwise we'd end up lining up with the GPU, and then the - // allocator would assume that the GPU has consumed what we just wrote. - if (gpu_position > num_bytes) - { - new_offset = 0; - new_space = gpu_position - 1; - new_gpu_position = gpu_position; - break; - } - } - else - { - // We're currently allocating behind the GPU. This would give us between the current - // offset and the GPU position worth of space to work with. Again, > because we can't - // align the GPU position with the buffer offset. - u32 available_space_inbetween = gpu_position - m_current_offset; - if (available_space_inbetween > num_bytes) - { - // Leave the offset as-is, but update the GPU position. - new_offset = m_current_offset; - new_space = available_space_inbetween - 1; - new_gpu_position = gpu_position; - break; - } - } - } + // We can wrap around to the start, behind the GPU, if there is enough space. + // We use > here because otherwise we'd end up lining up with the GPU, and then the + // allocator would assume that the GPU has consumed what we just wrote. + if (gpu_position > num_bytes) + { + new_offset = 0; + new_space = gpu_position - 1; + new_gpu_position = gpu_position; + break; + } + } + else + { + // We're currently allocating behind the GPU. This would give us between the current + // offset and the GPU position worth of space to work with. Again, > because we can't + // align the GPU position with the buffer offset. + u32 available_space_inbetween = gpu_position - m_current_offset; + if (available_space_inbetween > num_bytes) + { + // Leave the offset as-is, but update the GPU position. + new_offset = m_current_offset; + new_space = available_space_inbetween - 1; + new_gpu_position = gpu_position; + break; + } + } + } - // Did any fences satisfy this condition? - // Has the command buffer been executed yet? If not, the caller should execute it. - if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) - return false; + // Did any fences satisfy this condition? + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) + return false; - // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - MetalDevice::GetInstance().WaitForFenceCounter(iter->first); - m_tracked_fences.erase( - m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); - m_current_offset = new_offset; - m_current_space = new_space; - m_current_gpu_position = new_gpu_position; - return true; + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + MetalDevice::GetInstance().WaitForFenceCounter(iter->first); + m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); + m_current_offset = new_offset; + m_current_space = new_space; + m_current_gpu_position = new_gpu_position; + return true; } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 8a87c6bad..0a2e610a3 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2083,8 +2083,8 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Encode the 24-bit texture as 16-bit. const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get()); + g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));