More Metal work

This commit is contained in:
Stenzek 2023-08-06 21:59:38 +10:00
parent 716cd50a22
commit 9f04712dd3
9 changed files with 1829 additions and 1671 deletions

View File

@ -105,4 +105,111 @@ Standard: Cpp11
TabWidth: 2 TabWidth: 2
UseTab: Never UseTab: Never
... ...
---
Language: ObjC
AccessModifierOffset: -2
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: InlineOnly
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: true
AfterClass: true
AfterControlStatement: true
AfterEnum: true
AfterFunction: true
AfterNamespace: false
AfterObjCDeclaration: true
AfterStruct: true
AfterUnion: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Custom
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: true
IndentWidth: 2
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Left
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 2
UseTab: Never
...

View File

@ -470,7 +470,7 @@ void GPUDevice::RenderImGui()
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
DebugAssert(!pcmd->UserCallback); DebugAssert(!pcmd->UserCallback);
if (pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x) if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x)
continue; continue;
SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y), SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y),

View File

@ -476,8 +476,8 @@ public:
virtual RenderAPI GetRenderAPI() const = 0; virtual RenderAPI GetRenderAPI() const = 0;
bool Create(const std::string_view& adapter, const std::string_view& shader_cache_path, bool Create(const std::string_view& adapter, const std::string_view& shader_cache_path, bool debug_device,
bool debug_device, bool vsync); bool vsync);
void Destroy(); void Destroy();
virtual bool HasSurface() const = 0; virtual bool HasSurface() const = 0;

View File

@ -28,7 +28,7 @@ void GPUTexture::ClearBaseProperties()
m_state = State::Dirty; m_state = State::Dirty;
} }
u32 GPUTexture::GPUTexture::GetPixelSize(GPUTexture::Format format) u32 GPUTexture::GetPixelSize(GPUTexture::Format format)
{ {
switch (format) switch (format)
{ {
@ -59,7 +59,7 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
{ {
switch (format) switch (format)
{ {
case GPUTexture::Format::BGRA8: case Format::BGRA8:
{ {
for (u32 y = 0; y < height; y++) for (u32 y = 0; y < height; y++)
{ {
@ -71,10 +71,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
return true; return true;
} }
case GPUTexture::Format::RGBA8: case Format::RGBA8:
return true; return true;
case GPUTexture::Format::RGB565: case Format::RGB565:
{ {
std::vector<u32> temp(width * height); std::vector<u32> temp(width * height);
@ -102,7 +102,7 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
return true; return true;
} }
case GPUTexture::Format::RGBA5551: case Format::RGBA5551:
{ {
std::vector<u32> temp(width * height); std::vector<u32> temp(width * height);

View File

@ -17,6 +17,7 @@
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector> #include <vector>
#include <Metal/Metal.h> #include <Metal/Metal.h>
@ -42,14 +43,14 @@ class MetalSampler final : public GPUSampler
public: public:
~MetalSampler() override; ~MetalSampler() override;
ALWAYS_INLINE id<MTLSamplerState> GetSamplerState() const { return m_ss; } ALWAYS_INLINE id<MTLSamplerState> GetSamplerState() const { return m_ss; }
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
private: private:
MetalSampler(id<MTLSamplerState> ss); MetalSampler(id<MTLSamplerState> ss);
id<MTLSamplerState> m_ss; id<MTLSamplerState> m_ss;
}; };
class MetalShader final : public GPUShader class MetalShader final : public GPUShader
@ -59,16 +60,16 @@ class MetalShader final : public GPUShader
public: public:
~MetalShader() override; ~MetalShader() override;
ALWAYS_INLINE id<MTLLibrary> GetLibrary() const { return m_library; } ALWAYS_INLINE id<MTLLibrary> GetLibrary() const { return m_library; }
ALWAYS_INLINE id<MTLFunction> GetFunction() const { return m_function; } ALWAYS_INLINE id<MTLFunction> GetFunction() const { return m_function; }
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
private: private:
MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function); MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function);
id<MTLLibrary> m_library; id<MTLLibrary> m_library;
id<MTLFunction> m_function; id<MTLFunction> m_function;
}; };
class MetalPipeline final : public GPUPipeline class MetalPipeline final : public GPUPipeline
@ -77,21 +78,22 @@ class MetalPipeline final : public GPUPipeline
public: public:
~MetalPipeline() override; ~MetalPipeline() override;
ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; } ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; }
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; } ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
private: private:
MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive); MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
MTLPrimitiveType primitive);
id<MTLRenderPipelineState> m_pipeline;
id<MTLDepthStencilState> m_depth; id<MTLRenderPipelineState> m_pipeline;
MTLCullMode m_cull_mode; id<MTLDepthStencilState> m_depth;
MTLPrimitiveType m_primitive; MTLCullMode m_cull_mode;
MTLPrimitiveType m_primitive;
}; };
class MetalTexture final : public GPUTexture class MetalTexture final : public GPUTexture
@ -101,7 +103,7 @@ class MetalTexture final : public GPUTexture
public: public:
~MetalTexture(); ~MetalTexture();
ALWAYS_INLINE id<MTLTexture> GetMTLTexture() const { return m_texture; } ALWAYS_INLINE id<MTLTexture> GetMTLTexture() const { return m_texture; }
bool Create(id<MTLDevice> device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, bool Create(id<MTLDevice> device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type,
Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0); Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0);
@ -113,72 +115,69 @@ public:
bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override;
void Unmap() override; void Unmap() override;
void MakeReadyForSampling() override;
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
private: private:
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
Format format); Format format);
id<MTLTexture> m_texture;
u16 m_map_x = 0; id<MTLTexture> m_texture;
u16 m_map_y = 0;
u16 m_map_x = 0;
u16 m_map_y = 0;
u16 m_map_width = 0; u16 m_map_width = 0;
u16 m_map_height = 0; u16 m_map_height = 0;
u8 m_map_layer = 0; u8 m_map_layer = 0;
u8 m_map_level = 0; u8 m_map_level = 0;
}; };
#if 0
class MetalTextureBuffer final : public GPUTextureBuffer class MetalTextureBuffer final : public GPUTextureBuffer
{ {
public: public:
MetalTextureBuffer(Format format, u32 size_in_elements); MetalTextureBuffer(Format format, u32 size_in_elements);
~MetalTextureBuffer() override; ~MetalTextureBuffer() override;
ALWAYS_INLINE IMetalBuffer* GetBuffer() const { return m_buffer.GetD3DBuffer(); } ALWAYS_INLINE id<MTLBuffer> GetMTLBuffer() const { return m_buffer.GetBuffer(); }
ALWAYS_INLINE IMetalShaderResourceView* GetSRV() const { return m_srv.Get(); }
ALWAYS_INLINE IMetalShaderResourceView* const* GetSRVArray() const { return m_srv.GetAddressOf(); }
bool CreateBuffer(IMetalDevice* device); bool CreateBuffer(id<MTLDevice> device);
// Inherited via GPUTextureBuffer // Inherited via GPUTextureBuffer
virtual void* Map(u32 required_elements) override; void* Map(u32 required_elements) override;
virtual void Unmap(u32 used_elements) override; void Unmap(u32 used_elements) override;
private: private:
MetalStreamBuffer m_buffer; MetalStreamBuffer m_buffer;
Microsoft::WRL::ComPtr<IMetalShaderResourceView> m_srv;
}; };
#endif
class MetalFramebuffer final : public GPUFramebuffer class MetalFramebuffer final : public GPUFramebuffer
{ {
friend MetalDevice; friend MetalDevice;
public: public:
~MetalFramebuffer() override; ~MetalFramebuffer() override;
MTLRenderPassDescriptor* GetDescriptor() const;
void SetDebugName(const std::string_view& name) override; MTLRenderPassDescriptor* GetDescriptor() const;
void SetDebugName(const std::string_view& name) override;
private: private:
MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id<MTLTexture> rt_tex, id<MTLTexture> ds_tex, MetalFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, id<MTLTexture> rt_tex, id<MTLTexture> ds_tex,
MTLRenderPassDescriptor* descriptor); MTLRenderPassDescriptor* descriptor);
id<MTLTexture> m_rt_tex; id<MTLTexture> m_rt_tex;
id<MTLTexture> m_ds_tex; id<MTLTexture> m_ds_tex;
MTLRenderPassDescriptor* m_descriptor; MTLRenderPassDescriptor* m_descriptor;
}; };
class MetalDevice final : public GPUDevice class MetalDevice final : public GPUDevice
{ {
public: public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_host_display.get()); } ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; } ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; }
ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; }
ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; }
MetalDevice(); MetalDevice();
~MetalDevice(); ~MetalDevice();
@ -187,17 +186,9 @@ public:
bool HasSurface() const override; bool HasSurface() const override;
bool CreateDevice(const WindowInfo& wi, bool vsync) override; bool UpdateWindow() override;
bool SetupDevice() override; void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override;
bool MakeCurrent() override;
bool DoneCurrent() override;
bool ChangeWindow(const WindowInfo& new_wi) override;
void ResizeWindow(s32 new_window_width, s32 new_window_height) override;
bool SupportsFullscreen() const override;
bool IsFullscreen() override;
bool SetFullscreen(bool fullscreen, u32 width, u32 height, float refresh_rate) override;
AdapterAndModeList GetAdapterAndModeList() override; AdapterAndModeList GetAdapterAndModeList() override;
void DestroySurface() override; void DestroySurface() override;
@ -258,59 +249,73 @@ public:
bool BeginPresent(bool skip_present) override; bool BeginPresent(bool skip_present) override;
void EndPresent() override; void EndPresent() override;
void WaitForFenceCounter(u64 counter); void WaitForFenceCounter(u64 counter);
ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; }
id<MTLBlitCommandEncoder> GetTextureUploadEncoder(bool is_inline); id<MTLBlitCommandEncoder> GetBlitEncoder(bool is_inline);
void SubmitCommandBuffer(); void SubmitCommandBuffer(bool wait_for_completion = false);
void SubmitCommandBufferAndRestartRenderPass(const char* reason); void SubmitCommandBufferAndRestartRenderPass(const char* reason);
void CommitClear(MetalTexture* tex);
void UnbindFramebuffer(MetalFramebuffer* fb); void UnbindFramebuffer(MetalFramebuffer* fb);
void UnbindFramebuffer(MetalTexture* tex);
void UnbindPipeline(MetalPipeline* pl); void UnbindPipeline(MetalPipeline* pl);
void UnbindTexture(MetalTexture* tex); void UnbindTexture(MetalTexture* tex);
void UnbindTextureBuffer(MetalTextureBuffer* buf);
static void DeferRelease(id obj);
static void DeferRelease(u64 fence_counter, id obj);
static AdapterAndModeList StaticGetAdapterAndModeList(); static AdapterAndModeList StaticGetAdapterAndModeList();
protected:
bool CreateDevice(const std::string_view& adapter, bool debug_device) override;
void DestroyDevice() override;
private: private:
static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32/*16*/ * 1024 * 1024; // TODO reduce after separate allocations static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; }
void SetFeatures(); void SetFeatures();
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source, const std::string_view& entry_point);
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
void CreateCommandBuffer();
void CommandBufferCompleted(u64 fence_counter);
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
void BeginRenderPass();
void EndRenderPass();
void EndInlineUploading();
void EndAnyEncoding();
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, const std::string_view& source,
const std::string_view& entry_point);
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
void CreateCommandBuffer();
void CommandBufferCompletedOffThread(u64 fence_counter);
void WaitForPreviousCommandBuffers();
void CleanupObjects();
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
void BeginRenderPass();
void EndRenderPass();
void EndInlineUploading();
void EndAnyEncoding();
Common::Rectangle<s32> ClampToFramebufferSize(const Common::Rectangle<s32>& rc) const;
void PreDrawCheck(); void PreDrawCheck();
void SetInitialEncoderState(); void SetInitialEncoderState();
void SetUniformBufferInRenderEncoder(); void SetUniformBufferInRenderEncoder();
void SetViewportInRenderEncoder(); void SetViewportInRenderEncoder();
void SetScissorInRenderEncoder(); void SetScissorInRenderEncoder();
//bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); bool CheckDownloadBufferSize(u32 required_size);
//void DestroyStagingBuffer();
bool CreateLayer(); bool CreateLayer();
void DestroyLayer(); void DestroyLayer();
bool CreateBuffers(); bool CreateBuffers();
void DestroyBuffers(); void DestroyBuffers();
@ -320,54 +325,54 @@ private:
void PopTimestampQuery(); void PopTimestampQuery();
void KickTimestampQuery(); void KickTimestampQuery();
id<MTLDevice> m_device; id<MTLDevice> m_device;
id<MTLCommandQueue> m_queue; id<MTLCommandQueue> m_queue;
CAMetalLayer* m_layer = nil;
id<MTLDrawable> m_layer_drawable = nil;
MTLRenderPassDescriptor* m_layer_pass_desc = nil;
std::mutex m_fence_mutex;
u64 m_current_fence_counter = 0;
std::atomic<u64> m_completed_fence_counter{0};
DepthStateMap m_depth_states;
// ComPtr<IMetalTexture2D> m_readback_staging_texture; CAMetalLayer* m_layer = nil;
// DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; id<MTLDrawable> m_layer_drawable = nil;
// u32 m_readback_staging_texture_width = 0; MTLRenderPassDescriptor* m_layer_pass_desc = nil;
// u32 m_readback_staging_texture_height = 0;
std::mutex m_fence_mutex;
u64 m_current_fence_counter = 0;
std::atomic<u64> m_completed_fence_counter{0};
std::deque<std::pair<u64, id>> m_cleanup_objects; // [fence_counter, object]
DepthStateMap m_depth_states;
id<MTLBuffer> m_download_buffer = nil;
u32 m_download_buffer_size = 0;
MetalStreamBuffer m_vertex_buffer; MetalStreamBuffer m_vertex_buffer;
MetalStreamBuffer m_index_buffer; MetalStreamBuffer m_index_buffer;
MetalStreamBuffer m_uniform_buffer; MetalStreamBuffer m_uniform_buffer;
MetalStreamBuffer m_texture_upload_buffer; MetalStreamBuffer m_texture_upload_buffer;
id<MTLCommandBuffer> m_upload_cmdbuf = nil; id<MTLCommandBuffer> m_upload_cmdbuf = nil;
id<MTLBlitCommandEncoder> m_upload_encoder = nil; id<MTLBlitCommandEncoder> m_upload_encoder = nil;
id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil; id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil;
id<MTLCommandBuffer> m_render_cmdbuf = nil; id<MTLCommandBuffer> m_render_cmdbuf = nil;
id<MTLRenderCommandEncoder> m_render_encoder = nil; id<MTLRenderCommandEncoder> m_render_encoder = nil;
MetalFramebuffer* m_current_framebuffer = nullptr; MetalFramebuffer* m_current_framebuffer = nullptr;
MetalPipeline* m_current_pipeline = nullptr; MetalPipeline* m_current_pipeline = nullptr;
id<MTLDepthStencilState> m_current_depth_state = nil; id<MTLDepthStencilState> m_current_depth_state = nil;
MTLCullMode m_current_cull_mode = MTLCullModeNone; MTLCullMode m_current_cull_mode = MTLCullModeNone;
u32 m_current_uniform_buffer_position = 0; u32 m_current_uniform_buffer_position = 0;
std::array<id<MTLTexture>, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; std::array<id<MTLTexture>, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
std::array<id<MTLSamplerState>, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; std::array<id<MTLSamplerState>, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
Common::Rectangle<s32> m_current_viewport = {}; id<MTLBuffer> m_current_ssbo = nil;
Common::Rectangle<s32> m_current_scissor = {}; Common::Rectangle<s32> m_current_viewport = {};
Common::Rectangle<s32> m_current_scissor = {};
bool m_vsync_enabled = false;
// std::array<std::array<ComPtr<IMetalQuery>, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; bool m_vsync_enabled = false;
// u8 m_read_timestamp_query = 0;
// u8 m_write_timestamp_query = 0; // std::array<std::array<ComPtr<IMetalQuery>, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {};
// u8 m_waiting_timestamp_queries = 0; // u8 m_read_timestamp_query = 0;
// bool m_timestamp_query_started = false; // u8 m_write_timestamp_query = 0;
// float m_accumulated_gpu_time = 0.0f; // u8 m_waiting_timestamp_queries = 0;
// bool m_timestamp_query_started = false;
// float m_accumulated_gpu_time = 0.0f;
}; };

File diff suppressed because it is too large Load Diff

View File

@ -22,44 +22,44 @@
class MetalStreamBuffer class MetalStreamBuffer
{ {
public: public:
MetalStreamBuffer(); MetalStreamBuffer();
MetalStreamBuffer(MetalStreamBuffer&& move) = delete; MetalStreamBuffer(MetalStreamBuffer&& move) = delete;
MetalStreamBuffer(const MetalStreamBuffer&) = delete; MetalStreamBuffer(const MetalStreamBuffer&) = delete;
~MetalStreamBuffer(); ~MetalStreamBuffer();
MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete; MetalStreamBuffer& operator=(MetalStreamBuffer&& move) = delete;
MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete; MetalStreamBuffer& operator=(const MetalStreamBuffer&) = delete;
ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); } ALWAYS_INLINE bool IsValid() const { return (m_buffer != nil); }
ALWAYS_INLINE id<MTLBuffer> GetBuffer() const { return m_buffer; } ALWAYS_INLINE id<MTLBuffer> GetBuffer() const { return m_buffer; }
ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; } ALWAYS_INLINE u8* GetHostPointer() const { return m_host_pointer; }
ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } ALWAYS_INLINE u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; }
ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; } ALWAYS_INLINE u32 GetCurrentSize() const { return m_size; }
ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; } ALWAYS_INLINE u32 GetCurrentSpace() const { return m_current_space; }
ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; } ALWAYS_INLINE u32 GetCurrentOffset() const { return m_current_offset; }
bool Create(id<MTLDevice> device, u32 size); bool Create(id<MTLDevice> device, u32 size);
void Destroy(); void Destroy();
bool ReserveMemory(u32 num_bytes, u32 alignment); bool ReserveMemory(u32 num_bytes, u32 alignment);
void CommitMemory(u32 final_num_bytes); void CommitMemory(u32 final_num_bytes);
private: private:
bool AllocateBuffer(u32 size); bool AllocateBuffer(u32 size);
void UpdateCurrentFencePosition(); void UpdateCurrentFencePosition();
void UpdateGPUPosition(); void UpdateGPUPosition();
// Waits for as many fences as needed to allocate num_bytes bytes from the buffer. // Waits for as many fences as needed to allocate num_bytes bytes from the buffer.
bool WaitForClearSpace(u32 num_bytes); bool WaitForClearSpace(u32 num_bytes);
u32 m_size = 0; u32 m_size = 0;
u32 m_current_offset = 0; u32 m_current_offset = 0;
u32 m_current_space = 0; u32 m_current_space = 0;
u32 m_current_gpu_position = 0; u32 m_current_gpu_position = 0;
id<MTLBuffer> m_buffer = nil; id<MTLBuffer> m_buffer = nil;
u8* m_host_pointer = nullptr; u8* m_host_pointer = nullptr;
// List of fences and the corresponding positions in the buffer // List of fences and the corresponding positions in the buffer
std::deque<std::pair<u64, u32>> m_tracked_fences; std::deque<std::pair<u64, u32>> m_tracked_fences;
}; };

View File

@ -14,240 +14,242 @@ MetalStreamBuffer::MetalStreamBuffer() = default;
MetalStreamBuffer::~MetalStreamBuffer() MetalStreamBuffer::~MetalStreamBuffer()
{ {
if (IsValid()) if (IsValid())
Destroy(); Destroy();
} }
bool MetalStreamBuffer::Create(id<MTLDevice> device, u32 size) bool MetalStreamBuffer::Create(id<MTLDevice> device, u32 size)
{ @autoreleasepool { {
const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; @autoreleasepool
{
id<MTLBuffer> new_buffer = [device newBufferWithLength:size options:options]; const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
if (new_buffer == nil)
{ id<MTLBuffer> new_buffer = [device newBufferWithLength:size options:options];
Log_ErrorPrintf("Failed to create buffer."); if (new_buffer == nil)
return false; {
} Log_ErrorPrintf("Failed to create buffer.");
return false;
if (IsValid()) }
Destroy();
if (IsValid())
// Replace with the new buffer Destroy();
m_size = size;
m_current_offset = 0; // Replace with the new buffer
m_current_gpu_position = 0; m_size = size;
m_tracked_fences.clear(); m_current_offset = 0;
m_buffer = [new_buffer retain]; m_current_gpu_position = 0;
m_host_pointer = static_cast<u8*>([new_buffer contents]); m_tracked_fences.clear();
return true; m_buffer = [new_buffer retain];
} } m_host_pointer = static_cast<u8*>([new_buffer contents]);
return true;
}
}
void MetalStreamBuffer::Destroy() void MetalStreamBuffer::Destroy()
{ {
m_size = 0; m_size = 0;
m_current_offset = 0; m_current_offset = 0;
m_current_gpu_position = 0; m_current_gpu_position = 0;
m_tracked_fences.clear(); m_tracked_fences.clear();
[m_buffer release]; [m_buffer release];
m_buffer = nil; m_buffer = nil;
m_host_pointer = nullptr; m_host_pointer = nullptr;
} }
bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
{ {
const u32 required_bytes = num_bytes + alignment; const u32 required_bytes = num_bytes + alignment;
// Check for sane allocations // Check for sane allocations
if (required_bytes > m_size) if (required_bytes > m_size)
{ {
Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast<u32>(num_bytes), Log_ErrorPrintf("Attempting to allocate %u bytes from a %u byte stream buffer", static_cast<u32>(num_bytes),
static_cast<u32>(m_size)); static_cast<u32>(m_size));
Panic("Stream buffer overflow"); Panic("Stream buffer overflow");
return false; return false;
} }
UpdateGPUPosition(); UpdateGPUPosition();
// Is the GPU behind or up to date with our current offset? // Is the GPU behind or up to date with our current offset?
if (m_current_offset >= m_current_gpu_position) if (m_current_offset >= m_current_gpu_position)
{ {
const u32 remaining_bytes = m_size - m_current_offset; const u32 remaining_bytes = m_size - m_current_offset;
if (required_bytes <= remaining_bytes) if (required_bytes <= remaining_bytes)
{ {
// Place at the current position, after the GPU position. // Place at the current position, after the GPU position.
m_current_offset = Common::AlignUp(m_current_offset, alignment); m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_current_space = m_size - m_current_offset; m_current_space = m_size - m_current_offset;
return true; return true;
} }
// Check for space at the start of the buffer // Check for space at the start of the buffer
// We use < here because we don't want to have the case of m_current_offset == // We use < here because we don't want to have the case of m_current_offset ==
// m_current_gpu_position. That would mean the code above would assume the // m_current_gpu_position. That would mean the code above would assume the
// GPU has caught up to us, which it hasn't. // GPU has caught up to us, which it hasn't.
if (required_bytes < m_current_gpu_position) if (required_bytes < m_current_gpu_position)
{ {
// Reset offset to zero, since we're allocating behind the gpu now // Reset offset to zero, since we're allocating behind the gpu now
m_current_offset = 0; m_current_offset = 0;
m_current_space = m_current_gpu_position - 1; m_current_space = m_current_gpu_position - 1;
return true; return true;
} }
} }
// Is the GPU ahead of our current offset? // Is the GPU ahead of our current offset?
if (m_current_offset < m_current_gpu_position) if (m_current_offset < m_current_gpu_position)
{ {
// We have from m_current_offset..m_current_gpu_position space to use. // We have from m_current_offset..m_current_gpu_position space to use.
const u32 remaining_bytes = m_current_gpu_position - m_current_offset; const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
if (required_bytes < remaining_bytes) if (required_bytes < remaining_bytes)
{ {
// Place at the current position, since this is still behind the GPU. // Place at the current position, since this is still behind the GPU.
m_current_offset = Common::AlignUp(m_current_offset, alignment); m_current_offset = Common::AlignUp(m_current_offset, alignment);
m_current_space = m_current_gpu_position - m_current_offset - 1; m_current_space = m_current_gpu_position - m_current_offset - 1;
return true; return true;
} }
} }
// Can we find a fence to wait on that will give us enough memory? // Can we find a fence to wait on that will give us enough memory?
if (WaitForClearSpace(required_bytes)) if (WaitForClearSpace(required_bytes))
{ {
const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
m_current_offset += align_diff; m_current_offset += align_diff;
m_current_space -= align_diff; m_current_space -= align_diff;
return true; return true;
} }
// We tried everything we could, and still couldn't get anything. This means that too much space // We tried everything we could, and still couldn't get anything. This means that too much space
// in the buffer is being used by the command buffer currently being recorded. Therefore, the // in the buffer is being used by the command buffer currently being recorded. Therefore, the
// only option is to execute it, and wait until it's done. // only option is to execute it, and wait until it's done.
return false; return false;
} }
void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) void MetalStreamBuffer::CommitMemory(u32 final_num_bytes)
{ {
DebugAssert((m_current_offset + final_num_bytes) <= m_size); DebugAssert((m_current_offset + final_num_bytes) <= m_size);
DebugAssert(final_num_bytes <= m_current_space); DebugAssert(final_num_bytes <= m_current_space);
m_current_offset += final_num_bytes; m_current_offset += final_num_bytes;
m_current_space -= final_num_bytes; m_current_space -= final_num_bytes;
UpdateCurrentFencePosition(); UpdateCurrentFencePosition();
} }
void MetalStreamBuffer::UpdateCurrentFencePosition() void MetalStreamBuffer::UpdateCurrentFencePosition()
{ {
// Has the offset changed since the last fence? // Has the offset changed since the last fence?
const u64 counter = MetalDevice::GetCurrentFenceCounter(); const u64 counter = MetalDevice::GetCurrentFenceCounter();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
{ {
// Still haven't executed a command buffer, so just update the offset. // Still haven't executed a command buffer, so just update the offset.
m_tracked_fences.back().second = m_current_offset; m_tracked_fences.back().second = m_current_offset;
return; return;
} }
// New buffer, so update the GPU position while we're at it. // New buffer, so update the GPU position while we're at it.
m_tracked_fences.emplace_back(counter, m_current_offset); m_tracked_fences.emplace_back(counter, m_current_offset);
} }
void MetalStreamBuffer::UpdateGPUPosition() void MetalStreamBuffer::UpdateGPUPosition()
{ {
auto start = m_tracked_fences.begin(); auto start = m_tracked_fences.begin();
auto end = start; auto end = start;
const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); const u64 completed_counter = MetalDevice::GetCompletedFenceCounter();
while (end != m_tracked_fences.end() && completed_counter >= end->first) while (end != m_tracked_fences.end() && completed_counter >= end->first)
{ {
m_current_gpu_position = end->second; m_current_gpu_position = end->second;
++end; ++end;
} }
if (start != end) if (start != end)
{ {
m_tracked_fences.erase(start, end); m_tracked_fences.erase(start, end);
if (m_current_offset == m_current_gpu_position) if (m_current_offset == m_current_gpu_position)
{ {
// GPU is all caught up now. // GPU is all caught up now.
m_current_offset = 0; m_current_offset = 0;
m_current_gpu_position = 0; m_current_gpu_position = 0;
m_current_space = m_size; m_current_space = m_size;
} }
} }
} }
bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes)
{ {
u32 new_offset = 0; u32 new_offset = 0;
u32 new_space = 0; u32 new_space = 0;
u32 new_gpu_position = 0; u32 new_gpu_position = 0;
auto iter = m_tracked_fences.begin(); auto iter = m_tracked_fences.begin();
for (; iter != m_tracked_fences.end(); ++iter) for (; iter != m_tracked_fences.end(); ++iter)
{ {
// Would this fence bring us in line with the GPU? // Would this fence bring us in line with the GPU?
// This is the "last resort" case, where a command buffer execution has been forced // This is the "last resort" case, where a command buffer execution has been forced
// after no additional data has been written to it, so we can assume that after the // after no additional data has been written to it, so we can assume that after the
// fence has been signaled the entire buffer is now consumed. // fence has been signaled the entire buffer is now consumed.
u32 gpu_position = iter->second; u32 gpu_position = iter->second;
if (m_current_offset == gpu_position) if (m_current_offset == gpu_position)
{ {
new_offset = 0; new_offset = 0;
new_space = m_size; new_space = m_size;
new_gpu_position = 0; new_gpu_position = 0;
break; break;
} }
// Assuming that we wait for this fence, are we allocating in front of the GPU? // Assuming that we wait for this fence, are we allocating in front of the GPU?
if (m_current_offset > gpu_position) if (m_current_offset > gpu_position)
{ {
// This would suggest the GPU has now followed us and wrapped around, so we have from // This would suggest the GPU has now followed us and wrapped around, so we have from
// m_current_position..m_size free, as well as and 0..gpu_position. // m_current_position..m_size free, as well as and 0..gpu_position.
const u32 remaining_space_after_offset = m_size - m_current_offset; const u32 remaining_space_after_offset = m_size - m_current_offset;
if (remaining_space_after_offset >= num_bytes) if (remaining_space_after_offset >= num_bytes)
{ {
// Switch to allocating in front of the GPU, using the remainder of the buffer. // Switch to allocating in front of the GPU, using the remainder of the buffer.
new_offset = m_current_offset; new_offset = m_current_offset;
new_space = m_size - m_current_offset; new_space = m_size - m_current_offset;
new_gpu_position = gpu_position; new_gpu_position = gpu_position;
break; break;
} }
// We can wrap around to the start, behind the GPU, if there is enough space. // We can wrap around to the start, behind the GPU, if there is enough space.
// We use > here because otherwise we'd end up lining up with the GPU, and then the // We use > here because otherwise we'd end up lining up with the GPU, and then the
// allocator would assume that the GPU has consumed what we just wrote. // allocator would assume that the GPU has consumed what we just wrote.
if (gpu_position > num_bytes) if (gpu_position > num_bytes)
{ {
new_offset = 0; new_offset = 0;
new_space = gpu_position - 1; new_space = gpu_position - 1;
new_gpu_position = gpu_position; new_gpu_position = gpu_position;
break; break;
} }
} }
else else
{ {
// We're currently allocating behind the GPU. This would give us between the current // We're currently allocating behind the GPU. This would give us between the current
// offset and the GPU position worth of space to work with. Again, > because we can't // offset and the GPU position worth of space to work with. Again, > because we can't
// align the GPU position with the buffer offset. // align the GPU position with the buffer offset.
u32 available_space_inbetween = gpu_position - m_current_offset; u32 available_space_inbetween = gpu_position - m_current_offset;
if (available_space_inbetween > num_bytes) if (available_space_inbetween > num_bytes)
{ {
// Leave the offset as-is, but update the GPU position. // Leave the offset as-is, but update the GPU position.
new_offset = m_current_offset; new_offset = m_current_offset;
new_space = available_space_inbetween - 1; new_space = available_space_inbetween - 1;
new_gpu_position = gpu_position; new_gpu_position = gpu_position;
break; break;
} }
} }
} }
// Did any fences satisfy this condition? // Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it. // Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter())
return false; return false;
// Wait until this fence is signaled. This will fire the callback, updating the GPU position. // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
MetalDevice::GetInstance().WaitForFenceCounter(iter->first); MetalDevice::GetInstance().WaitForFenceCounter(iter->first);
m_tracked_fences.erase( m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset;
m_current_offset = new_offset; m_current_space = new_space;
m_current_space = new_space; m_current_gpu_position = new_gpu_position;
m_current_gpu_position = new_gpu_position; return true;
return true;
} }

View File

@ -2083,8 +2083,8 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Encode the 24-bit texture as 16-bit. // Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
g_gpu_device->SetPipeline(m_vram_readback_pipeline.get());
g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get()); g_gpu_device->SetFramebuffer(m_vram_readback_framebuffer.get());
g_gpu_device->SetPipeline(m_vram_readback_pipeline.get());
g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height);
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));