MetalDevice: Avoid inline texture uploads when unnecessary
This commit is contained in:
parent
1b9e72e3a6
commit
8a0033dfb3
|
@ -121,12 +121,19 @@ public:
|
||||||
|
|
||||||
void SetDebugName(const std::string_view& name) override;
|
void SetDebugName(const std::string_view& name) override;
|
||||||
|
|
||||||
|
// Call when the texture is bound to the pipeline, or read from in a copy.
|
||||||
|
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
|
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
|
||||||
Format format);
|
Format format);
|
||||||
|
|
||||||
id<MTLTexture> m_texture;
|
id<MTLTexture> m_texture;
|
||||||
|
|
||||||
|
// Contains the fence counter when the texture was last used.
|
||||||
|
// When this matches the current fence counter, the texture was used this command buffer.
|
||||||
|
u64 m_use_fence_counter = 0;
|
||||||
|
|
||||||
u16 m_map_x = 0;
|
u16 m_map_x = 0;
|
||||||
u16 m_map_y = 0;
|
u16 m_map_y = 0;
|
||||||
u16 m_map_width = 0;
|
u16 m_map_width = 0;
|
||||||
|
@ -179,9 +186,9 @@ class MetalDevice final : public GPUDevice
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
|
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
|
||||||
ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; }
|
ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
|
||||||
ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; }
|
ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; }
|
||||||
ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; }
|
ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; }
|
||||||
|
|
||||||
MetalDevice();
|
MetalDevice();
|
||||||
~MetalDevice();
|
~MetalDevice();
|
||||||
|
|
|
@ -130,7 +130,7 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
|
||||||
id<MTLDevice> device = nil;
|
id<MTLDevice> device = nil;
|
||||||
if (!adapter.empty())
|
if (!adapter.empty())
|
||||||
{
|
{
|
||||||
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
|
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
|
||||||
const u32 count = static_cast<u32>([devices count]);
|
const u32 count = static_cast<u32>([devices count]);
|
||||||
for (u32 i = 0; i < count; i++)
|
for (u32 i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
|
@ -140,11 +140,11 @@ bool MetalDevice::CreateDevice(const std::string_view& adapter, bool threaded_pr
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device == nil)
|
if (device == nil)
|
||||||
Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str());
|
Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device == nil)
|
if (device == nil)
|
||||||
{
|
{
|
||||||
device = [MTLCreateSystemDefaultDevice() autorelease];
|
device = [MTLCreateSystemDefaultDevice() autorelease];
|
||||||
|
@ -416,14 +416,15 @@ void MetalDevice::DestroyBuffers()
|
||||||
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
|
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
|
||||||
{
|
{
|
||||||
AdapterAndModeList ret;
|
AdapterAndModeList ret;
|
||||||
@autoreleasepool {
|
@autoreleasepool
|
||||||
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
|
{
|
||||||
|
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
|
||||||
const u32 count = static_cast<u32>([devices count]);
|
const u32 count = static_cast<u32>([devices count]);
|
||||||
ret.adapter_names.reserve(count);
|
ret.adapter_names.reserve(count);
|
||||||
for (u32 i = 0; i < count; i++)
|
for (u32 i = 0; i < count; i++)
|
||||||
ret.adapter_names.emplace_back([devices[i].name UTF8String]);
|
ret.adapter_names.emplace_back([devices[i].name UTF8String]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -914,7 +915,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
|
||||||
Panic("Failed to allocate temporary buffer.");
|
Panic("Failed to allocate temporary buffer.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
dev.DeferRelease(actual_buffer);
|
dev.DeferRelease(actual_buffer);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -928,19 +929,19 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
actual_offset = sb.GetCurrentOffset();
|
actual_offset = sb.GetCurrentOffset();
|
||||||
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
|
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
|
||||||
sb.CommitMemory(req_size);
|
sb.CommitMemory(req_size);
|
||||||
actual_buffer = sb.GetBuffer();
|
actual_buffer = sb.GetBuffer();
|
||||||
actual_pitch = aligned_pitch;
|
actual_pitch = aligned_pitch;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
|
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
|
||||||
dev.CommitClear(this);
|
dev.CommitClear(this);
|
||||||
|
|
||||||
// TODO: track this
|
const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter());
|
||||||
const bool is_inline = true;
|
|
||||||
id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
|
id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
|
||||||
[encoder copyFromBuffer:actual_buffer
|
[encoder copyFromBuffer:actual_buffer
|
||||||
sourceOffset:actual_offset
|
sourceOffset:actual_offset
|
||||||
|
@ -1423,6 +1424,9 @@ void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d
|
||||||
|
|
||||||
CommitClear(S);
|
CommitClear(S);
|
||||||
|
|
||||||
|
S->SetUseFenceCounter(m_current_fence_counter);
|
||||||
|
D->SetUseFenceCounter(m_current_fence_counter);
|
||||||
|
|
||||||
@autoreleasepool
|
@autoreleasepool
|
||||||
{
|
{
|
||||||
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
|
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
|
||||||
|
@ -1743,7 +1747,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||||
DebugAssert(slot < MAX_TEXTURE_SAMPLERS);
|
DebugAssert(slot < MAX_TEXTURE_SAMPLERS);
|
||||||
|
|
||||||
id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
|
id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
|
||||||
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
|
if (texture)
|
||||||
|
static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter);
|
||||||
|
|
||||||
if (m_current_textures[slot] != T)
|
if (m_current_textures[slot] != T)
|
||||||
{
|
{
|
||||||
|
@ -1752,6 +1757,7 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||||
[m_render_encoder setFragmentTexture:T atIndex:slot];
|
[m_render_encoder setFragmentTexture:T atIndex:slot];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
|
||||||
if (m_current_samplers[slot] != S)
|
if (m_current_samplers[slot] != S)
|
||||||
{
|
{
|
||||||
m_current_samplers[slot] = S;
|
m_current_samplers[slot] = S;
|
||||||
|
@ -1829,7 +1835,8 @@ void MetalDevice::BeginRenderPass()
|
||||||
m_inline_upload_encoder = nil;
|
m_inline_upload_encoder = nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
@autoreleasepool {
|
@autoreleasepool
|
||||||
|
{
|
||||||
MTLRenderPassDescriptor* desc;
|
MTLRenderPassDescriptor* desc;
|
||||||
if (!m_current_framebuffer)
|
if (!m_current_framebuffer)
|
||||||
{
|
{
|
||||||
|
@ -1841,6 +1848,10 @@ void MetalDevice::BeginRenderPass()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
desc = m_current_framebuffer->GetDescriptor();
|
desc = m_current_framebuffer->GetDescriptor();
|
||||||
|
if (MetalTexture* RT = static_cast<MetalTexture*>(m_current_framebuffer->GetRT()))
|
||||||
|
RT->SetUseFenceCounter(m_current_fence_counter);
|
||||||
|
if (MetalTexture* DS = static_cast<MetalTexture*>(m_current_framebuffer->GetDS()))
|
||||||
|
DS->SetUseFenceCounter(m_current_fence_counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];
|
m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];
|
||||||
|
|
|
@ -138,7 +138,7 @@ void MetalStreamBuffer::CommitMemory(u32 final_num_bytes)
|
||||||
void MetalStreamBuffer::UpdateCurrentFencePosition()
|
void MetalStreamBuffer::UpdateCurrentFencePosition()
|
||||||
{
|
{
|
||||||
// Has the offset changed since the last fence?
|
// Has the offset changed since the last fence?
|
||||||
const u64 counter = MetalDevice::GetCurrentFenceCounter();
|
const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter();
|
||||||
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
|
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
|
||||||
{
|
{
|
||||||
// Still haven't executed a command buffer, so just update the offset.
|
// Still haven't executed a command buffer, so just update the offset.
|
||||||
|
@ -155,7 +155,7 @@ void MetalStreamBuffer::UpdateGPUPosition()
|
||||||
auto start = m_tracked_fences.begin();
|
auto start = m_tracked_fences.begin();
|
||||||
auto end = start;
|
auto end = start;
|
||||||
|
|
||||||
const u64 completed_counter = MetalDevice::GetCompletedFenceCounter();
|
const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter();
|
||||||
while (end != m_tracked_fences.end() && completed_counter >= end->first)
|
while (end != m_tracked_fences.end() && completed_counter >= end->first)
|
||||||
{
|
{
|
||||||
m_current_gpu_position = end->second;
|
m_current_gpu_position = end->second;
|
||||||
|
@ -242,11 +242,12 @@ bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes)
|
||||||
|
|
||||||
// Did any fences satisfy this condition?
|
// Did any fences satisfy this condition?
|
||||||
// Has the command buffer been executed yet? If not, the caller should execute it.
|
// Has the command buffer been executed yet? If not, the caller should execute it.
|
||||||
if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter())
|
MetalDevice& dev = MetalDevice::GetInstance();
|
||||||
|
if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
|
// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
|
||||||
MetalDevice::GetInstance().WaitForFenceCounter(iter->first);
|
dev.WaitForFenceCounter(iter->first);
|
||||||
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
|
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
|
||||||
m_current_offset = new_offset;
|
m_current_offset = new_offset;
|
||||||
m_current_space = new_space;
|
m_current_space = new_space;
|
||||||
|
|
Loading…
Reference in New Issue