MetalDevice: Avoid inline texture uploads when unnecessary

This commit is contained in:
Stenzek 2023-08-24 19:00:01 +10:00
parent 1b9e72e3a6
commit 8a0033dfb3
3 changed files with 39 additions and 20 deletions

View File

@ -121,12 +121,19 @@ public:
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
// Call when the texture is bound to the pipeline, or read from in a copy.
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
private: private:
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
Format format); Format format);
id<MTLTexture> m_texture; id<MTLTexture> m_texture;
// Contains the fence counter when the texture was last used.
// When this matches the current fence counter, the texture was used this command buffer.
u64 m_use_fence_counter = 0;
u16 m_map_x = 0; u16 m_map_x = 0;
u16 m_map_y = 0; u16 m_map_y = 0;
u16 m_map_width = 0; u16 m_map_width = 0;
@ -179,9 +186,9 @@ class MetalDevice final : public GPUDevice
{ {
public: public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); } ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; } ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; } ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; }
ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; } ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; }
MetalDevice(); MetalDevice();
~MetalDevice(); ~MetalDevice();

View File

@ -130,7 +130,7 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
id<MTLDevice> device = nil; id<MTLDevice> device = nil;
if (!adapter.empty()) if (!adapter.empty())
{ {
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease]; NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]); const u32 count = static_cast<u32>([devices count]);
for (u32 i = 0; i < count; i++) for (u32 i = 0; i < count; i++)
{ {
@ -140,11 +140,11 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
break; break;
} }
} }
if (device == nil) if (device == nil)
Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str()); Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str());
} }
if (device == nil) if (device == nil)
{ {
device = [MTLCreateSystemDefaultDevice() autorelease]; device = [MTLCreateSystemDefaultDevice() autorelease];
@ -416,14 +416,15 @@ void MetalDevice::DestroyBuffers()
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList() GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
{ {
AdapterAndModeList ret; AdapterAndModeList ret;
@autoreleasepool { @autoreleasepool
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease]; {
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]); const u32 count = static_cast<u32>([devices count]);
ret.adapter_names.reserve(count); ret.adapter_names.reserve(count);
for (u32 i = 0; i < count; i++) for (u32 i = 0; i < count; i++)
ret.adapter_names.emplace_back([devices[i].name UTF8String]); ret.adapter_names.emplace_back([devices[i].name UTF8String]);
} }
return ret; return ret;
} }
@ -914,7 +915,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
Panic("Failed to allocate temporary buffer."); Panic("Failed to allocate temporary buffer.");
return false; return false;
} }
dev.DeferRelease(actual_buffer); dev.DeferRelease(actual_buffer);
} }
else else
@ -928,19 +929,19 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
return false; return false;
} }
} }
actual_offset = sb.GetCurrentOffset(); actual_offset = sb.GetCurrentOffset();
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
sb.CommitMemory(req_size); sb.CommitMemory(req_size);
actual_buffer = sb.GetBuffer(); actual_buffer = sb.GetBuffer();
actual_pitch = aligned_pitch; actual_pitch = aligned_pitch;
} }
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
dev.CommitClear(this); dev.CommitClear(this);
// TODO: track this const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter());
const bool is_inline = true;
id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline); id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
[encoder copyFromBuffer:actual_buffer [encoder copyFromBuffer:actual_buffer
sourceOffset:actual_offset sourceOffset:actual_offset
@ -1423,6 +1424,9 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d
CommitClear(S); CommitClear(S);
S->SetUseFenceCounter(m_current_fence_counter);
D->SetUseFenceCounter(m_current_fence_counter);
@autoreleasepool @autoreleasepool
{ {
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true); id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
@ -1743,7 +1747,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
DebugAssert(slot < MAX_TEXTURE_SAMPLERS); DebugAssert(slot < MAX_TEXTURE_SAMPLERS);
id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil; id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil; if (texture)
static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter);
if (m_current_textures[slot] != T) if (m_current_textures[slot] != T)
{ {
@ -1752,6 +1757,7 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
[m_render_encoder setFragmentTexture:T atIndex:slot]; [m_render_encoder setFragmentTexture:T atIndex:slot];
} }
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
if (m_current_samplers[slot] != S) if (m_current_samplers[slot] != S)
{ {
m_current_samplers[slot] = S; m_current_samplers[slot] = S;
@ -1829,7 +1835,8 @@ void MetalDevice::BeginRenderPass()
m_inline_upload_encoder = nil; m_inline_upload_encoder = nil;
} }
@autoreleasepool { @autoreleasepool
{
MTLRenderPassDescriptor* desc; MTLRenderPassDescriptor* desc;
if (!m_current_framebuffer) if (!m_current_framebuffer)
{ {
@ -1841,6 +1848,10 @@ void MetalDevice::BeginRenderPass()
else else
{ {
desc = m_current_framebuffer->GetDescriptor(); desc = m_current_framebuffer->GetDescriptor();
if (MetalTexture* RT = static_cast<MetalTexture*>(m_current_framebuffer->GetRT()))
RT->SetUseFenceCounter(m_current_fence_counter);
if (MetalTexture* DS = static_cast<MetalTexture*>(m_current_framebuffer->GetDS()))
DS->SetUseFenceCounter(m_current_fence_counter);
} }
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain]; m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];

View File

@ -138,7 +138,7 @@ void MetalStreamBuffer::CommitMemory(u32 final_num_bytes)
void MetalStreamBuffer::UpdateCurrentFencePosition() void MetalStreamBuffer::UpdateCurrentFencePosition()
{ {
// Has the offset changed since the last fence? // Has the offset changed since the last fence?
const u64 counter = MetalDevice::GetCurrentFenceCounter(); const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
{ {
// Still haven't executed a command buffer, so just update the offset. // Still haven't executed a command buffer, so just update the offset.
@ -155,7 +155,7 @@ void MetalStreamBuffer::UpdateGPUPosition()
auto start = m_tracked_fences.begin(); auto start = m_tracked_fences.begin();
auto end = start; auto end = start;
const u64 completed_counter = MetalDevice::GetCompletedFenceCounter(); const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter();
while (end != m_tracked_fences.end() && completed_counter >= end->first) while (end != m_tracked_fences.end() && completed_counter >= end->first)
{ {
m_current_gpu_position = end->second; m_current_gpu_position = end->second;
@ -242,11 +242,12 @@ bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes)
// Did any fences satisfy this condition? // Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it. // Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter()) MetalDevice& dev = MetalDevice::GetInstance();
if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter())
return false; return false;
// Wait until this fence is signaled. This will fire the callback, updating the GPU position. // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
MetalDevice::GetInstance().WaitForFenceCounter(iter->first); dev.WaitForFenceCounter(iter->first);
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset; m_current_offset = new_offset;
m_current_space = new_space; m_current_space = new_space;