VideoBackends:Metal: Bring back unified memory config
Turns out it was helpful. (Most improvement in ubershaders.) This time with much better auto mode.
This commit is contained in:
parent
93ce7bf344
commit
c08de82e90
|
@ -92,6 +92,9 @@ const Info<int> GFX_SHADER_PRECOMPILER_THREADS{
|
||||||
const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
|
const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
|
||||||
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
||||||
|
|
||||||
|
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
|
||||||
|
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
|
||||||
|
|
||||||
const Info<bool> GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false};
|
const Info<bool> GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false};
|
||||||
const Info<bool> GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false};
|
const Info<bool> GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false};
|
||||||
const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"},
|
const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"},
|
||||||
|
|
|
@ -11,6 +11,7 @@ enum class AspectMode : int;
|
||||||
enum class ShaderCompilationMode : int;
|
enum class ShaderCompilationMode : int;
|
||||||
enum class StereoMode : int;
|
enum class StereoMode : int;
|
||||||
enum class FreelookControlType : int;
|
enum class FreelookControlType : int;
|
||||||
|
enum class TriState : int;
|
||||||
|
|
||||||
namespace Config
|
namespace Config
|
||||||
{
|
{
|
||||||
|
@ -74,6 +75,8 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
|
||||||
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||||
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
||||||
|
|
||||||
|
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
|
||||||
|
|
||||||
extern const Info<bool> GFX_SW_DUMP_OBJECTS;
|
extern const Info<bool> GFX_SW_DUMP_OBJECTS;
|
||||||
extern const Info<bool> GFX_SW_DUMP_TEV_STAGES;
|
extern const Info<bool> GFX_SW_DUMP_TEV_STAGES;
|
||||||
extern const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES;
|
extern const Info<bool> GFX_SW_DUMP_TEV_TEX_FETCHES;
|
||||||
|
|
|
@ -36,6 +36,7 @@ std::vector<BBoxType> Metal::BoundingBox::Read(u32 index, u32 length)
|
||||||
{
|
{
|
||||||
g_state_tracker->EndRenderPass();
|
g_state_tracker->EndRenderPass();
|
||||||
g_state_tracker->FlushEncoders();
|
g_state_tracker->FlushEncoders();
|
||||||
|
g_state_tracker->NotifyOfCPUGPUSync();
|
||||||
g_state_tracker->WaitForFlushedEncoders();
|
g_state_tracker->WaitForFlushedEncoders();
|
||||||
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
|
return std::vector<BBoxType>(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,7 @@ void Metal::PerfQuery::FlushResults()
|
||||||
|
|
||||||
// There's a possibility that some active performance queries are unflushed
|
// There's a possibility that some active performance queries are unflushed
|
||||||
g_state_tracker->FlushEncoders();
|
g_state_tracker->FlushEncoders();
|
||||||
|
g_state_tracker->NotifyOfCPUGPUSync();
|
||||||
|
|
||||||
std::unique_lock<std::mutex> lock(m_results_mtx);
|
std::unique_lock<std::mutex> lock(m_results_mtx);
|
||||||
while (!IsFlushed())
|
while (!IsFlushed())
|
||||||
|
|
|
@ -34,7 +34,6 @@ public:
|
||||||
Uniform,
|
Uniform,
|
||||||
Vertex,
|
Vertex,
|
||||||
Index,
|
Index,
|
||||||
TextureData,
|
|
||||||
Texels,
|
Texels,
|
||||||
Last = Texels
|
Last = Texels
|
||||||
};
|
};
|
||||||
|
@ -75,6 +74,14 @@ public:
|
||||||
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
|
return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire);
|
||||||
}
|
}
|
||||||
void ReloadSamplers();
|
void ReloadSamplers();
|
||||||
|
void NotifyOfCPUGPUSync()
|
||||||
|
{
|
||||||
|
if (!g_features.manual_buffer_upload || !m_manual_buffer_upload)
|
||||||
|
return;
|
||||||
|
if (m_upload_cmdbuf || m_current_render_cmdbuf)
|
||||||
|
return;
|
||||||
|
SetManualBufferUpload(false);
|
||||||
|
}
|
||||||
|
|
||||||
void SetPipeline(const Pipeline* pipe);
|
void SetPipeline(const Pipeline* pipe);
|
||||||
void SetPipeline(const ComputePipeline* pipe);
|
void SetPipeline(const ComputePipeline* pipe);
|
||||||
|
@ -106,6 +113,7 @@ public:
|
||||||
{
|
{
|
||||||
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
|
return (amt + static_cast<size_t>(align)) & ~static_cast<size_t>(align);
|
||||||
}
|
}
|
||||||
|
Map AllocateForTextureUpload(size_t amt);
|
||||||
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
|
Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align)
|
||||||
{
|
{
|
||||||
Preallocate(buffer_idx, amt);
|
Preallocate(buffer_idx, amt);
|
||||||
|
@ -119,6 +127,7 @@ public:
|
||||||
static_cast<size_t>(align)) == 0);
|
static_cast<size_t>(align)) == 0);
|
||||||
return CommitPreallocation(buffer_idx, Align(amt, align));
|
return CommitPreallocation(buffer_idx, Align(amt, align));
|
||||||
}
|
}
|
||||||
|
id<MTLBlitCommandEncoder> GetUploadEncoder();
|
||||||
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
|
id<MTLBlitCommandEncoder> GetTextureUploadEncoder();
|
||||||
id<MTLCommandBuffer> GetRenderCmdBuf();
|
id<MTLCommandBuffer> GetRenderCmdBuf();
|
||||||
|
|
||||||
|
@ -142,18 +151,28 @@ private:
|
||||||
void Reset(size_t new_size);
|
void Reset(size_t new_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Buffer
|
struct CPUBuffer
|
||||||
{
|
{
|
||||||
UsageTracker usage;
|
UsageTracker usage;
|
||||||
MRCOwned<id<MTLBuffer>> mtlbuffer;
|
MRCOwned<id<MTLBuffer>> mtlbuffer;
|
||||||
void* buffer = nullptr;
|
void* buffer = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct BufferPair
|
||||||
|
{
|
||||||
|
UsageTracker usage;
|
||||||
|
MRCOwned<id<MTLBuffer>> cpubuffer;
|
||||||
|
MRCOwned<id<MTLBuffer>> gpubuffer;
|
||||||
|
void* buffer = nullptr;
|
||||||
|
size_t last_upload = 0;
|
||||||
|
};
|
||||||
|
|
||||||
struct Backref;
|
struct Backref;
|
||||||
struct PerfQueryTracker;
|
struct PerfQueryTracker;
|
||||||
|
|
||||||
std::shared_ptr<Backref> m_backref;
|
std::shared_ptr<Backref> m_backref;
|
||||||
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
|
std::vector<std::shared_ptr<PerfQueryTracker>> m_perf_query_tracker_cache;
|
||||||
|
MRCOwned<id<MTLFence>> m_fence;
|
||||||
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
|
MRCOwned<id<MTLCommandBuffer>> m_upload_cmdbuf;
|
||||||
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
|
MRCOwned<id<MTLBlitCommandEncoder>> m_upload_encoder;
|
||||||
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
|
MRCOwned<id<MTLCommandBuffer>> m_texture_upload_cmdbuf;
|
||||||
|
@ -165,7 +184,8 @@ private:
|
||||||
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
|
MRCOwned<MTLRenderPassDescriptor*> m_render_pass_desc[3];
|
||||||
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
|
MRCOwned<MTLRenderPassDescriptor*> m_resolve_pass_desc;
|
||||||
Framebuffer* m_current_framebuffer;
|
Framebuffer* m_current_framebuffer;
|
||||||
Buffer m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
|
CPUBuffer m_texture_upload_buffer;
|
||||||
|
BufferPair m_upload_buffers[static_cast<int>(UploadBuffer::Last) + 1];
|
||||||
u64 m_current_draw = 1;
|
u64 m_current_draw = 1;
|
||||||
std::atomic<u64> m_last_finished_draw{0};
|
std::atomic<u64> m_last_finished_draw{0};
|
||||||
|
|
||||||
|
@ -249,9 +269,12 @@ private:
|
||||||
} m_state;
|
} m_state;
|
||||||
|
|
||||||
u32 m_perf_query_tracker_counter = 0;
|
u32 m_perf_query_tracker_counter = 0;
|
||||||
|
bool m_manual_buffer_upload = false;
|
||||||
|
|
||||||
|
void SetManualBufferUpload(bool enable);
|
||||||
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
|
std::shared_ptr<PerfQueryTracker> NewPerfQueryTracker();
|
||||||
void SetSamplerForce(u32 idx, const SamplerState& sampler);
|
void SetSamplerForce(u32 idx, const SamplerState& sampler);
|
||||||
|
void Sync(BufferPair& buffer);
|
||||||
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
|
Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt);
|
||||||
void CheckViewport();
|
void CheckViewport();
|
||||||
void CheckScissor();
|
void CheckScissor();
|
||||||
|
|
|
@ -44,7 +44,6 @@ static NSString* GetName(Metal::StateTracker::UploadBuffer buffer)
|
||||||
// clang-format off
|
// clang-format off
|
||||||
switch (buffer)
|
switch (buffer)
|
||||||
{
|
{
|
||||||
case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data";
|
|
||||||
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
|
case Metal::StateTracker::UploadBuffer::Texels: return @"Texels";
|
||||||
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
|
case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices";
|
||||||
case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
|
case Metal::StateTracker::UploadBuffer::Index: return @"Indices";
|
||||||
|
@ -104,6 +103,7 @@ void Metal::StateTracker::UsageTracker::Reset(size_t new_size)
|
||||||
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
|
Metal::StateTracker::StateTracker() : m_backref(std::make_shared<Backref>(this))
|
||||||
{
|
{
|
||||||
m_flags.should_apply_label = true;
|
m_flags.should_apply_label = true;
|
||||||
|
m_fence = MRCTransfer([g_device newFence]);
|
||||||
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
|
for (MRCOwned<MTLRenderPassDescriptor*>& rpdesc : m_render_pass_desc)
|
||||||
{
|
{
|
||||||
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
|
rpdesc = MRCTransfer([MTLRenderPassDescriptor new]);
|
||||||
|
@ -140,9 +140,10 @@ Metal::StateTracker::~StateTracker()
|
||||||
|
|
||||||
// MARK: BufferPair Ops
|
// MARK: BufferPair Ops
|
||||||
|
|
||||||
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
|
Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt)
|
||||||
{
|
{
|
||||||
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
|
amt = (amt + 15) & ~15ull;
|
||||||
|
CPUBuffer& buffer = m_texture_upload_buffer;
|
||||||
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
|
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
|
||||||
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
|
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
|
||||||
if (__builtin_expect(needs_new, false))
|
if (__builtin_expect(needs_new, false))
|
||||||
|
@ -154,11 +155,61 @@ std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_id
|
||||||
MTLResourceOptions options =
|
MTLResourceOptions options =
|
||||||
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
|
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
|
||||||
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
|
buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
|
||||||
[buffer.mtlbuffer setLabel:GetName(buffer_idx)];
|
[buffer.mtlbuffer setLabel:@"Texture Upload Buffer"];
|
||||||
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
|
ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)");
|
||||||
buffer.buffer = [buffer.mtlbuffer contents];
|
buffer.buffer = [buffer.mtlbuffer contents];
|
||||||
buffer.usage.Reset(newsize);
|
buffer.usage.Reset(newsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
|
||||||
|
|
||||||
|
Map ret = {buffer.mtlbuffer, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
|
||||||
|
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
|
||||||
|
"Previous code should have guaranteed there was enough space");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt)
|
||||||
|
{
|
||||||
|
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
|
||||||
|
u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire);
|
||||||
|
size_t base_pos = buffer.usage.Pos();
|
||||||
|
bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt);
|
||||||
|
bool needs_upload = needs_new || buffer.usage.Pos() == 0;
|
||||||
|
if (m_manual_buffer_upload && needs_upload)
|
||||||
|
{
|
||||||
|
if (base_pos != buffer.last_upload)
|
||||||
|
{
|
||||||
|
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
|
||||||
|
[encoder copyFromBuffer:buffer.cpubuffer
|
||||||
|
sourceOffset:buffer.last_upload
|
||||||
|
toBuffer:buffer.gpubuffer
|
||||||
|
destinationOffset:buffer.last_upload
|
||||||
|
size:base_pos - buffer.last_upload];
|
||||||
|
}
|
||||||
|
buffer.last_upload = 0;
|
||||||
|
}
|
||||||
|
if (__builtin_expect(needs_new, false))
|
||||||
|
{
|
||||||
|
// Orphan buffer
|
||||||
|
size_t newsize = std::max<size_t>(buffer.usage.Size() * 2, 4096);
|
||||||
|
while (newsize < amt)
|
||||||
|
newsize *= 2;
|
||||||
|
MTLResourceOptions options =
|
||||||
|
MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
|
||||||
|
buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
|
||||||
|
[buffer.cpubuffer setLabel:GetName(buffer_idx)];
|
||||||
|
ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
|
||||||
|
buffer.buffer = [buffer.cpubuffer contents];
|
||||||
|
buffer.usage.Reset(newsize);
|
||||||
|
if (g_features.manual_buffer_upload)
|
||||||
|
{
|
||||||
|
options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked;
|
||||||
|
buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]);
|
||||||
|
[buffer.gpubuffer setLabel:GetName(buffer_idx)];
|
||||||
|
ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)");
|
||||||
|
}
|
||||||
|
}
|
||||||
size_t pos = buffer.usage.Pos();
|
size_t pos = buffer.usage.Pos();
|
||||||
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
|
return std::make_pair(reinterpret_cast<char*>(buffer.buffer) + pos, pos);
|
||||||
}
|
}
|
||||||
|
@ -166,17 +217,46 @@ std::pair<void*, size_t> Metal::StateTracker::Preallocate(UploadBuffer buffer_id
|
||||||
Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
|
Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx,
|
||||||
size_t amt)
|
size_t amt)
|
||||||
{
|
{
|
||||||
Buffer& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
|
BufferPair& buffer = m_upload_buffers[static_cast<int>(buffer_idx)];
|
||||||
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
|
size_t pos = buffer.usage.Allocate(m_current_draw, amt);
|
||||||
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
|
Map ret = {nil, pos, reinterpret_cast<char*>(buffer.buffer) + pos};
|
||||||
ret.gpu_buffer = buffer.mtlbuffer;
|
ret.gpu_buffer = m_manual_buffer_upload ? buffer.gpubuffer : buffer.cpubuffer;
|
||||||
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
|
DEBUG_ASSERT(pos <= buffer.usage.Size() &&
|
||||||
"Previous code should have guaranteed there was enough space");
|
"Previous code should have guaranteed there was enough space");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Metal::StateTracker::Sync(BufferPair& buffer)
|
||||||
|
{
|
||||||
|
if (!m_manual_buffer_upload || buffer.usage.Pos() == buffer.last_upload)
|
||||||
|
return;
|
||||||
|
|
||||||
|
id<MTLBlitCommandEncoder> encoder = GetUploadEncoder();
|
||||||
|
[encoder copyFromBuffer:buffer.cpubuffer
|
||||||
|
sourceOffset:buffer.last_upload
|
||||||
|
toBuffer:buffer.gpubuffer
|
||||||
|
destinationOffset:buffer.last_upload
|
||||||
|
size:buffer.usage.Pos() - buffer.last_upload];
|
||||||
|
buffer.last_upload = buffer.usage.Pos();
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: Render Pass / Encoder Management
|
// MARK: Render Pass / Encoder Management
|
||||||
|
|
||||||
|
id<MTLBlitCommandEncoder> Metal::StateTracker::GetUploadEncoder()
|
||||||
|
{
|
||||||
|
if (!m_upload_cmdbuf)
|
||||||
|
{
|
||||||
|
@autoreleasepool
|
||||||
|
{
|
||||||
|
m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]);
|
||||||
|
[m_upload_cmdbuf setLabel:@"Vertex Upload"];
|
||||||
|
m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]);
|
||||||
|
[m_upload_encoder setLabel:@"Vertex Upload"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m_upload_encoder;
|
||||||
|
}
|
||||||
|
|
||||||
id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
|
id<MTLBlitCommandEncoder> Metal::StateTracker::GetTextureUploadEncoder()
|
||||||
{
|
{
|
||||||
if (!m_texture_upload_cmdbuf)
|
if (!m_texture_upload_cmdbuf)
|
||||||
|
@ -269,6 +349,8 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor)
|
||||||
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
|
MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]);
|
||||||
if (m_current_perf_query)
|
if (m_current_perf_query)
|
||||||
[descriptor setVisibilityResultBuffer:nil];
|
[descriptor setVisibilityResultBuffer:nil];
|
||||||
|
if (m_manual_buffer_upload)
|
||||||
|
[m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex];
|
||||||
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
|
AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment();
|
||||||
if (!attachment)
|
if (!attachment)
|
||||||
attachment = m_current_framebuffer->GetDepthAttachment();
|
attachment = m_current_framebuffer->GetDepthAttachment();
|
||||||
|
@ -298,6 +380,8 @@ void Metal::StateTracker::BeginComputePass()
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
|
m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]);
|
||||||
[m_current_compute_encoder setLabel:@"Compute"];
|
[m_current_compute_encoder setLabel:@"Compute"];
|
||||||
|
if (m_manual_buffer_upload)
|
||||||
|
[m_current_compute_encoder waitForFence:m_fence];
|
||||||
m_flags.NewEncoder();
|
m_flags.NewEncoder();
|
||||||
m_dirty_samplers = 0xff;
|
m_dirty_samplers = 0xff;
|
||||||
m_dirty_textures = 0xff;
|
m_dirty_textures = 0xff;
|
||||||
|
@ -325,6 +409,20 @@ void Metal::StateTracker::FlushEncoders()
|
||||||
if (!m_current_render_cmdbuf)
|
if (!m_current_render_cmdbuf)
|
||||||
return;
|
return;
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
for (int i = 0; i <= static_cast<int>(UploadBuffer::Last); ++i)
|
||||||
|
Sync(m_upload_buffers[i]);
|
||||||
|
if (!m_manual_buffer_upload)
|
||||||
|
{
|
||||||
|
ASSERT(!m_upload_cmdbuf && "Should never be used!");
|
||||||
|
}
|
||||||
|
else if (m_upload_cmdbuf)
|
||||||
|
{
|
||||||
|
[m_upload_encoder updateFence:m_fence];
|
||||||
|
[m_upload_encoder endEncoding];
|
||||||
|
[m_upload_cmdbuf commit];
|
||||||
|
m_upload_encoder = nullptr;
|
||||||
|
m_upload_cmdbuf = nullptr;
|
||||||
|
}
|
||||||
if (m_texture_upload_cmdbuf)
|
if (m_texture_upload_cmdbuf)
|
||||||
{
|
{
|
||||||
[m_texture_upload_encoder endEncoding];
|
[m_texture_upload_encoder endEncoding];
|
||||||
|
@ -354,6 +452,8 @@ void Metal::StateTracker::FlushEncoders()
|
||||||
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
|
m_last_render_cmdbuf = std::move(m_current_render_cmdbuf);
|
||||||
m_current_render_cmdbuf = nullptr;
|
m_current_render_cmdbuf = nullptr;
|
||||||
m_current_draw++;
|
m_current_draw++;
|
||||||
|
if (g_features.manual_buffer_upload && !m_manual_buffer_upload)
|
||||||
|
SetManualBufferUpload(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Metal::StateTracker::WaitForFlushedEncoders()
|
void Metal::StateTracker::WaitForFlushedEncoders()
|
||||||
|
@ -367,6 +467,23 @@ void Metal::StateTracker::ReloadSamplers()
|
||||||
m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
|
m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Metal::StateTracker::SetManualBufferUpload(bool enabled)
|
||||||
|
{
|
||||||
|
// When a game does something that needs CPU-GPU sync (e.g. bbox, texture download, etc),
|
||||||
|
// the next command buffer will be done with manual buffer upload disabled,
|
||||||
|
// since overlapping the upload with the previous draw won't be possible (due to sync).
|
||||||
|
// This greatly improves performance in heavy bbox games like Super Paper Mario.
|
||||||
|
m_manual_buffer_upload = enabled;
|
||||||
|
if (enabled)
|
||||||
|
{
|
||||||
|
for (BufferPair& buffer : m_upload_buffers)
|
||||||
|
{
|
||||||
|
// Update sync positions, since Sync doesn't do it when manual buffer upload is off
|
||||||
|
buffer.last_upload = buffer.usage.Pos();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MARK: State Setters
|
// MARK: State Setters
|
||||||
|
|
||||||
void Metal::StateTracker::SetPipeline(const Pipeline* pipe)
|
void Metal::StateTracker::SetPipeline(const Pipeline* pipe)
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "Common/Align.h"
|
#include "Common/Align.h"
|
||||||
#include "Common/Assert.h"
|
#include "Common/Assert.h"
|
||||||
|
|
||||||
|
#include "VideoBackends/Metal/MTLRenderer.h"
|
||||||
#include "VideoBackends/Metal/MTLStateTracker.h"
|
#include "VideoBackends/Metal/MTLStateTracker.h"
|
||||||
|
|
||||||
Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config)
|
Metal::Texture::Texture(MRCOwned<id<MTLTexture>> tex, const TextureConfig& config)
|
||||||
|
@ -59,8 +60,7 @@ void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, //
|
||||||
const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
|
const u32 num_rows = Common::AlignUp(height, block_size) / block_size;
|
||||||
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
|
const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length);
|
||||||
const u32 upload_size = source_pitch * num_rows;
|
const u32 upload_size = source_pitch * num_rows;
|
||||||
StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData,
|
StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size);
|
||||||
upload_size, StateTracker::AlignMask::Other);
|
|
||||||
memcpy(map.cpu_buffer, buffer, upload_size);
|
memcpy(map.cpu_buffer, buffer, upload_size);
|
||||||
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
|
id<MTLBlitCommandEncoder> encoder = g_state_tracker->GetTextureUploadEncoder();
|
||||||
[encoder copyFromBuffer:map.gpu_buffer
|
[encoder copyFromBuffer:map.gpu_buffer
|
||||||
|
@ -163,6 +163,7 @@ void Metal::StagingTexture::Flush()
|
||||||
{
|
{
|
||||||
// Flush while we wait, since who knows how long we'll be sitting here
|
// Flush while we wait, since who knows how long we'll be sitting here
|
||||||
g_state_tracker->FlushEncoders();
|
g_state_tracker->FlushEncoders();
|
||||||
|
g_state_tracker->NotifyOfCPUGPUSync();
|
||||||
[m_wait_buffer waitUntilCompleted];
|
[m_wait_buffer waitUntilCompleted];
|
||||||
}
|
}
|
||||||
m_wait_buffer = nullptr;
|
m_wait_buffer = nullptr;
|
||||||
|
|
|
@ -16,6 +16,10 @@ namespace Metal
|
||||||
{
|
{
|
||||||
struct DeviceFeatures
|
struct DeviceFeatures
|
||||||
{
|
{
|
||||||
|
/// Manually copy buffer data to the GPU (instead of letting the GPU read from system memory)
|
||||||
|
/// On discrete GPUs, this tends to be faster if the copy is able to operate in parallel with a
|
||||||
|
/// previous render. This is the case unless a game uses features like bbox or texture downloads.
|
||||||
|
bool manual_buffer_upload;
|
||||||
bool subgroup_ops;
|
bool subgroup_ops;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -216,6 +216,27 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id<MTLDevice>
|
||||||
config->backend_info.AAModes.push_back(i);
|
config->backend_info.AAModes.push_back(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (config->iManuallyUploadBuffers)
|
||||||
|
{
|
||||||
|
case TriState::Off:
|
||||||
|
g_features.manual_buffer_upload = false;
|
||||||
|
break;
|
||||||
|
case TriState::On:
|
||||||
|
g_features.manual_buffer_upload = true;
|
||||||
|
break;
|
||||||
|
case TriState::Auto:
|
||||||
|
#if TARGET_OS_OSX
|
||||||
|
g_features.manual_buffer_upload = false;
|
||||||
|
if (@available(macOS 10.15, *))
|
||||||
|
if (![device hasUnifiedMemory])
|
||||||
|
g_features.manual_buffer_upload = true;
|
||||||
|
#else
|
||||||
|
// All iOS devices have unified memory
|
||||||
|
g_features.manual_buffer_upload = false;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
g_features.subgroup_ops = false;
|
g_features.subgroup_ops = false;
|
||||||
if (@available(macOS 10.15, iOS 13, *))
|
if (@available(macOS 10.15, iOS 13, *))
|
||||||
{
|
{
|
||||||
|
|
|
@ -55,6 +55,7 @@ void VideoConfig::Refresh()
|
||||||
|
|
||||||
bVSync = Config::Get(Config::GFX_VSYNC);
|
bVSync = Config::Get(Config::GFX_VSYNC);
|
||||||
iAdapter = Config::Get(Config::GFX_ADAPTER);
|
iAdapter = Config::Get(Config::GFX_ADAPTER);
|
||||||
|
iManuallyUploadBuffers = Config::Get(Config::GFX_MTL_MANUALLY_UPLOAD_BUFFERS);
|
||||||
|
|
||||||
bWidescreenHack = Config::Get(Config::GFX_WIDESCREEN_HACK);
|
bWidescreenHack = Config::Get(Config::GFX_WIDESCREEN_HACK);
|
||||||
aspect_mode = Config::Get(Config::GFX_ASPECT_RATIO);
|
aspect_mode = Config::Get(Config::GFX_ASPECT_RATIO);
|
||||||
|
|
|
@ -45,6 +45,13 @@ enum class ShaderCompilationMode : int
|
||||||
AsynchronousSkipRendering
|
AsynchronousSkipRendering
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class TriState : int
|
||||||
|
{
|
||||||
|
Off,
|
||||||
|
On,
|
||||||
|
Auto
|
||||||
|
};
|
||||||
|
|
||||||
// NEVER inherit from this class.
|
// NEVER inherit from this class.
|
||||||
struct VideoConfig final
|
struct VideoConfig final
|
||||||
{
|
{
|
||||||
|
@ -148,6 +155,9 @@ struct VideoConfig final
|
||||||
// D3D only config, mostly to be merged into the above
|
// D3D only config, mostly to be merged into the above
|
||||||
int iAdapter = 0;
|
int iAdapter = 0;
|
||||||
|
|
||||||
|
// Metal only config
|
||||||
|
TriState iManuallyUploadBuffers = TriState::Auto;
|
||||||
|
|
||||||
// Enable API validation layers, currently only supported with Vulkan.
|
// Enable API validation layers, currently only supported with Vulkan.
|
||||||
bool bEnableValidationLayer = false;
|
bool bEnableValidationLayer = false;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue