VideoBackends:Metal: Remove unified memory config

Not worth the extra code
This commit is contained in:
TellowKrinkle 2022-07-21 20:07:23 -05:00
parent 5065767abd
commit a41345127f
6 changed files with 21 additions and 165 deletions

View File

@ -16,28 +16,16 @@ Metal::BoundingBox::~BoundingBox()
bool Metal::BoundingBox::Initialize() bool Metal::BoundingBox::Initialize()
{ {
const MTLResourceOptions gpu_storage_mode = const MTLResourceOptions gpu_options =
g_features.unified_memory ? MTLResourceStorageModeShared : MTLResourceStorageModePrivate; MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked;
const MTLResourceOptions gpu_options = gpu_storage_mode | MTLResourceHazardTrackingModeUntracked;
const id<MTLDevice> dev = g_device; const id<MTLDevice> dev = g_device;
m_upload_fence = MRCTransfer([dev newFence]); m_upload_fence = MRCTransfer([dev newFence]);
[m_upload_fence setLabel:@"BBox Upload Fence"]; [m_upload_fence setLabel:@"BBox Upload Fence"];
m_download_fence = MRCTransfer([dev newFence]); m_download_fence = MRCTransfer([dev newFence]);
[m_download_fence setLabel:@"BBox Download Fence"]; [m_download_fence setLabel:@"BBox Download Fence"];
m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]); m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]);
if (g_features.unified_memory) [m_gpu_buffer setLabel:@"BBox Buffer"];
{ m_cpu_buffer_ptr = static_cast<BBoxType*>([m_gpu_buffer contents]);
[m_gpu_buffer setLabel:@"BBox Buffer"];
m_cpu_buffer_ptr = static_cast<BBoxType*>([m_gpu_buffer contents]);
}
else
{
m_cpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE
options:MTLResourceStorageModeShared]);
m_cpu_buffer_ptr = static_cast<BBoxType*>([m_cpu_buffer contents]);
[m_gpu_buffer setLabel:@"BBox GPU Buffer"];
[m_cpu_buffer setLabel:@"BBox CPU Buffer"];
}
g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence); g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence);
return true; return true;
} }
@ -47,18 +35,6 @@ std::vector<BBoxType> Metal::BoundingBox::Read(u32 index, u32 length)
@autoreleasepool @autoreleasepool
{ {
g_state_tracker->EndRenderPass(); g_state_tracker->EndRenderPass();
if (!g_features.unified_memory)
{
id<MTLBlitCommandEncoder> download = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder];
[download setLabel:@"BBox Download"];
[download waitForFence:m_download_fence];
[download copyFromBuffer:m_gpu_buffer
sourceOffset:0
toBuffer:m_cpu_buffer
destinationOffset:0
size:BUFFER_SIZE];
[download endEncoding];
}
g_state_tracker->FlushEncoders(); g_state_tracker->FlushEncoders();
g_state_tracker->WaitForFlushedEncoders(); g_state_tracker->WaitForFlushedEncoders();
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
@ -68,8 +44,7 @@ std::vector<BBoxType> Metal::BoundingBox::Read(u32 index, u32 length)
void Metal::BoundingBox::Write(u32 index, const std::vector<BBoxType>& values) void Metal::BoundingBox::Write(u32 index, const std::vector<BBoxType>& values)
{ {
const u32 size = values.size() * sizeof(BBoxType); const u32 size = values.size() * sizeof(BBoxType);
if (g_features.unified_memory && !g_state_tracker->HasUnflushedData() && if (!g_state_tracker->HasUnflushedData() && !g_state_tracker->GPUBusy())
!g_state_tracker->GPUBusy())
{ {
// We can just write directly to the buffer! // We can just write directly to the buffer!
memcpy(m_cpu_buffer_ptr + index, values.data(), size); memcpy(m_cpu_buffer_ptr + index, values.data(), size);

View File

@ -34,6 +34,7 @@ public:
Uniform, Uniform,
Vertex, Vertex,
Index, Index,
TextureData,
Texels, Texels,
Last = Texels Last = Texels
}; };
@ -105,7 +106,6 @@ public:
{ {
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align); return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
} }
Map AllocateForTextureUpload(size_t amt);
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
{ {
Preallocate(buffer_idx, amt); Preallocate(buffer_idx, amt);
@ -119,7 +119,6 @@ public:
static_cast<size_t>(align)) == 0); static_cast<size_t>(align)) == 0);
return CommitPreallocation(buffer_idx, Align(amt, align)); return CommitPreallocation(buffer_idx, Align(amt, align));
} }
id<MTLBlitCommandEncoder> GetUploadEncoder();
id<MTLBlitCommandEncoder> GetTextureUploadEncoder(); id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
id<MTLCommandBuffer> GetRenderCmdBuf(); id<MTLCommandBuffer> GetRenderCmdBuf();
@ -143,28 +142,18 @@ private:
void Reset(size_t new_size); void Reset(size_t new_size);
}; };
struct CPUBuffer struct Buffer
{ {
UsageTracker usage; UsageTracker usage;
MRCOwned<id<MTLBuffer>> mtlbuffer; MRCOwned<id<MTLBuffer>> mtlbuffer;
void* buffer = nullptr; void* buffer = nullptr;
}; };
struct BufferPair
{
UsageTracker usage;
MRCOwned<id<MTLBuffer>> cpubuffer;
MRCOwned<id<MTLBuffer>> gpubuffer;
void* buffer = nullptr;
size_t last_upload = 0;
};
struct Backref; struct Backref;
struct PerfQueryTracker; struct PerfQueryTracker;
std::shared_ptr<Backref> m_backref; std::shared_ptr<Backref> m_backref;
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache; std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
MRCOwned<id<MTLFence>> m_fence;
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf; MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder; MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf; MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
@ -176,8 +165,7 @@ private:
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3]; MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc; MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
Framebuffer* m_current_framebuffer; Framebuffer* m_current_framebuffer;
CPUBuffer m_texture_upload_buffer; Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
BufferPair m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
u64 m_current_draw = 1; u64 m_current_draw = 1;
std::atomic<u64> m_last_finished_draw{0}; std::atomic<u64> m_last_finished_draw{0};
@ -264,7 +252,6 @@ private:
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker(); std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
void SetSamplerForce(u32 idx, const SamplerState& sampler); void SetSamplerForce(u32 idx, const SamplerState& sampler);
void Sync(BufferPair& buffer);
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
void CheckViewport(); void CheckViewport();
void CheckScissor(); void CheckScissor();

View File

@ -44,11 +44,12 @@ static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
// clang-format off // clang-format off
switch (buffer) switch (buffer)
{ {
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data";
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms";
case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload";
} }
// clang-format on // clang-format on
} }
@ -103,7 +104,6 @@ void Metal::StateTracker::UsageTracker::Reset(size_t new_size)
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this)) Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
{ {
m_flags.should_apply_label = true; m_flags.should_apply_label = true;
m_fence = MRCTransfer([g_device newFence]);
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc) for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
{ {
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
@ -140,10 +140,9 @@ Metal::StateTracker::~StateTracker()
// MARK: BufferPair Ops // MARK: BufferPair Ops
Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt) std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
{ {
amt = (amt + 15) & ~15ull; Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
CPUBuffer& buffer = m_texture_upload_buffer;
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
if (__builtin_expect(needs_new, false)) if (__builtin_expect(needs_new, false))
@ -155,61 +154,11 @@ Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t am
MTLResourceOptions options = MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.mtlbuffer setLabel:@"Texture Upload Buffer"]; [buffer.mtlbuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.mtlbuffer contents]; buffer.buffer = [buffer.mtlbuffer contents];
buffer.usage.Reset(newsize); buffer.usage.Reset(newsize);
} }
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {buffer.mtlbuffer, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space");
return ret;
}
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
{
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
size_t base_pos = buffer.usage.Pos();
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
bool needs_upload = needs_new || buffer.usage.Pos() == 0;
if (!g_features.unified_memory && needs_upload)
{
if (base_pos != buffer.last_upload)
{
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:base_pos - buffer.last_upload];
}
buffer.last_upload = 0;
}
if (__builtin_expect(needs_new, false))
{
// Orphan buffer
size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
while (newsize < amt)
newsize *= 2;
MTLResourceOptions options =
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.cpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
buffer.buffer = [buffer.cpubuffer contents];
buffer.usage.Reset(newsize);
if (!g_features.unified_memory)
{
options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
[buffer.gpubuffer setLabel:GetName(buffer_idx)];
ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
}
}
size_t pos = buffer.usage.Pos(); size_t pos = buffer.usage.Pos();
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos); return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
} }
@ -217,46 +166,17 @@ std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_id
Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
size_t amt) size_t amt)
{ {
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)]; Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
size_t pos = buffer.usage.Allocate(m_current_draw, amt); size_t pos = buffer.usage.Allocate(m_current_draw, amt);
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos}; Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
ret.gpu_buffer = g_features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; ret.gpu_buffer = buffer.mtlbuffer;
DEBUG_ASSERT(pos <= buffer.usage.Size() && DEBUG_ASSERT(pos <= buffer.usage.Size() &&
"Previous code should have guaranteed there was enough space"); "Previous code should have guaranteed there was enough space");
return ret; return ret;
} }
void Metal::StateTracker::Sync(BufferPair& buffer)
{
if (g_features.unified_memory || buffer.usage.Pos() == buffer.last_upload)
return;
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
[encoder copyFromBuffer:buffer.cpubuffer
sourceOffset:buffer.last_upload
toBuffer:buffer.gpubuffer
destinationOffset:buffer.last_upload
size:buffer.usage.Pos() - buffer.last_upload];
buffer.last_upload = buffer.usage.Pos();
}
// MARK: Render Pass / Encoder Management // MARK: Render Pass / Encoder Management
id<MTLBlitCommandEncoder> Metal::StateTracker::GetUploadEncoder()
{
if (!m_upload_cmdbuf)
{
@autoreleasepool
{
m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
[m_upload_cmdbuf setLabel:@"Vertex Upload"];
m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]);
[m_upload_encoder setLabel:@"Vertex Upload"];
}
}
return m_upload_encoder;
}
id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder() id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
{ {
if (!m_texture_upload_cmdbuf) if (!m_texture_upload_cmdbuf)
@ -349,8 +269,6 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
if (m_current_perf_query) if (m_current_perf_query)
[descriptor setVisibilityResultBuffer:nil]; [descriptor setVisibilityResultBuffer:nil];
if (!g_features.unified_memory)
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
if (!attachment) if (!attachment)
attachment = m_current_framebuffer->GetDepthAttachment(); attachment = m_current_framebuffer->GetDepthAttachment();
@ -380,8 +298,6 @@ void Metal::StateTracker::BeginComputePass()
EndRenderPass(); EndRenderPass();
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
[m_current_compute_encoder setLabel:@"Compute"]; [m_current_compute_encoder setLabel:@"Compute"];
if (!g_features.unified_memory)
[m_current_compute_encoder waitForFence:m_fence];
m_flags.NewEncoder(); m_flags.NewEncoder();
m_dirty_samplers = 0xff; m_dirty_samplers = 0xff;
m_dirty_textures = 0xff; m_dirty_textures = 0xff;
@ -409,20 +325,6 @@ void Metal::StateTracker::FlushEncoders()
if (!m_current_render_cmdbuf) if (!m_current_render_cmdbuf)
return; return;
EndRenderPass(); EndRenderPass();
for (int i = 0; i <= static_cast<int>(UploadBuffer::Last); ++i)
Sync(m_upload_buffers[i]);
if (g_features.unified_memory)
{
ASSERT(!m_upload_cmdbuf && "Should never be used!");
}
else if (m_upload_cmdbuf)
{
[m_upload_encoder updateFence:m_fence];
[m_upload_encoder endEncoding];
[m_upload_cmdbuf commit];
m_upload_encoder = nullptr;
m_upload_cmdbuf = nullptr;
}
if (m_texture_upload_cmdbuf) if (m_texture_upload_cmdbuf)
{ {
[m_texture_upload_encoder endEncoding]; [m_texture_upload_encoder endEncoding];

View File

@ -59,7 +59,8 @@ void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
const u32 num_rows = Common::AlignUp(height, block_size) / block_size; const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
const u32 upload_size = source_pitch * num_rows; const u32 upload_size = source_pitch * num_rows;
StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size); StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData,
upload_size, StateTracker::AlignMask::Other);
memcpy(map.cpu_buffer, buffer, upload_size); memcpy(map.cpu_buffer, buffer, upload_size);
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder(); id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
[encoder copyFromBuffer:map.gpu_buffer [encoder copyFromBuffer:map.gpu_buffer

View File

@ -16,7 +16,6 @@ namespace Metal
{ {
struct DeviceFeatures struct DeviceFeatures
{ {
bool unified_memory;
bool subgroup_ops; bool subgroup_ops;
}; };

View File

@ -211,14 +211,6 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice>
config->backend_info.AAModes.push_back(i); config->backend_info.AAModes.push_back(i);
} }
// The unified memory path (using shared buffers for everything) performs noticeably better with
// bbox even on discrete GPUs (20fps vs 15fps in Super Paper Mario elevator), so default to that.
// The separate buffer + manual upload path is left available for testing and comparison.
if (char* env = getenv("MTL_UNIFIED_MEMORY"))
g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y';
else
g_features.unified_memory = true;
g_features.subgroup_ops = false; g_features.subgroup_ops = false;
if (@available(macOS 10.15, iOS 13, *)) if (@available(macOS 10.15, iOS 13, *))
{ {