Metal: Unsupported topology indexed draw conversion (#40)

* Convert unsupported indexed buffer topologies

* Fix index count and dispatch size

* Cleanup

* Fix typos
This commit is contained in:
Isaac Marovitz 2024-09-02 12:55:30 +01:00 committed by Isaac Marovitz
parent 4439e7dcdc
commit d30d23bd0f
No known key found for this signature in database
GPG Key ID: 97250B2B09A132E1
9 changed files with 256 additions and 106 deletions

View File

@ -318,6 +318,35 @@ namespace Ryujinx.Graphics.Metal
return holder.GetBuffer(); return holder.GetBuffer();
} }
public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, int offset, int size, IndexBufferPattern pattern, int indexSize)
{
if (!BoundToRange(offset, ref size))
{
return null;
}
var key = new TopologyConversionCacheKey(_renderer, pattern, indexSize);
if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder))
{
// The destination index size is always I32.
int indexCount = size / indexSize;
int convertedCount = pattern.GetConvertedCount(indexCount);
holder = _renderer.BufferManager.Create(convertedCount * 4);
_renderer.HelperShader.ConvertIndexBuffer(cbs, this, holder, pattern, indexSize, offset, indexCount);
key.SetBuffer(holder.GetBuffer());
_cachedConvertedBuffers.Add(offset, size, key, holder);
}
return holder.GetBuffer();
}
public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder) public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder)
{ {
return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder); return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder);

View File

@ -177,6 +177,16 @@ namespace Ryujinx.Graphics.Metal
return null; return null;
} }
public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, IndexBufferPattern pattern, int indexSize)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBufferTopologyConversion(cbs, offset, size, pattern, indexSize);
}
return null;
}
public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size) public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
{ {
if (TryGetBuffer(handle, out var holder)) if (TryGetBuffer(handle, out var holder))

View File

@ -39,80 +39,42 @@ namespace Ryujinx.Graphics.Metal
} }
} }
// [SupportedOSPlatform("macos")] [SupportedOSPlatform("macos")]
// struct AlignedVertexBufferCacheKey : ICacheKey struct TopologyConversionCacheKey : ICacheKey
// { {
// private readonly int _stride; private readonly IndexBufferPattern _pattern;
// private readonly int _alignment; private readonly int _indexSize;
//
// // Used to notify the pipeline that bindings have invalidated on dispose.
// // private readonly MetalRenderer _renderer;
// // private Auto<DisposableBuffer> _buffer;
//
// public AlignedVertexBufferCacheKey(MetalRenderer renderer, int stride, int alignment)
// {
// // _renderer = renderer;
// _stride = stride;
// _alignment = alignment;
// // _buffer = null;
// }
//
// public readonly bool KeyEqual(ICacheKey other)
// {
// return other is AlignedVertexBufferCacheKey entry &&
// entry._stride == _stride &&
// entry._alignment == _alignment;
// }
//
// public void SetBuffer(Auto<DisposableBuffer> buffer)
// {
// // _buffer = buffer;
// }
//
// public readonly void Dispose()
// {
// // TODO: Tell pipeline buffer is dirty!
// // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
// }
// }
// [SupportedOSPlatform("macos")] // Used to notify the pipeline that bindings have invalidated on dispose.
// struct TopologyConversionCacheKey : ICacheKey // private readonly MetalRenderer _renderer;
// { // private Auto<DisposableBuffer> _buffer;
// // TODO: Patterns
// // private readonly IndexBufferPattern _pattern; public TopologyConversionCacheKey(MetalRenderer renderer, IndexBufferPattern pattern, int indexSize)
// private readonly int _indexSize; {
// // _renderer = renderer;
// // Used to notify the pipeline that bindings have invalidated on dispose. // _buffer = null;
// // private readonly MetalRenderer _renderer; _pattern = pattern;
// // private Auto<DisposableBuffer> _buffer; _indexSize = indexSize;
// }
// public TopologyConversionCacheKey(MetalRenderer renderer, /*IndexBufferPattern pattern, */int indexSize)
// { public readonly bool KeyEqual(ICacheKey other)
// // _renderer = renderer; {
// // _pattern = pattern; return other is TopologyConversionCacheKey entry &&
// _indexSize = indexSize; entry._pattern == _pattern &&
// // _buffer = null; entry._indexSize == _indexSize;
// } }
//
// public readonly bool KeyEqual(ICacheKey other) public void SetBuffer(Auto<DisposableBuffer> buffer)
// { {
// return other is TopologyConversionCacheKey entry && // _buffer = buffer;
// // entry._pattern == _pattern && }
// entry._indexSize == _indexSize;
// } public readonly void Dispose()
// {
// public void SetBuffer(Auto<DisposableBuffer> buffer) // TODO: Tell pipeline buffer is dirty!
// { // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
// // _buffer = buffer; }
// } }
//
// public readonly void Dispose()
// {
// // TODO: Tell pipeline buffer is dirty!
// // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
// }
// }
[SupportedOSPlatform("macos")] [SupportedOSPlatform("macos")]
readonly struct Dependency readonly struct Dependency

View File

@ -33,6 +33,7 @@ namespace Ryujinx.Graphics.Metal
private readonly IProgram _programDepthStencilClear; private readonly IProgram _programDepthStencilClear;
private readonly IProgram _programStrideChange; private readonly IProgram _programStrideChange;
private readonly IProgram _programConvertD32S8ToD24S8; private readonly IProgram _programConvertD32S8ToD24S8;
private readonly IProgram _programConvertIndexBuffer;
private readonly IProgram _programDepthBlit; private readonly IProgram _programDepthBlit;
private readonly IProgram _programDepthBlitMs; private readonly IProgram _programDepthBlitMs;
private readonly IProgram _programStencilBlit; private readonly IProgram _programStencilBlit;
@ -163,6 +164,17 @@ namespace Ryujinx.Graphics.Metal
new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl) new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl)
], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1)); ], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1));
var convertIndexBufferLayout = new ResourceLayoutBuilder()
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true)
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 3).Build();
var convertIndexBufferSource = ReadMsl("ConvertIndexBuffer.metal");
_programConvertIndexBuffer = new Program(
[
new ShaderSource(convertIndexBufferSource, ShaderStage.Compute, TargetLanguage.Msl)
], convertIndexBufferLayout, device, new ComputeSize(16, 1, 1));
var depthBlitSource = ReadMsl("DepthBlit.metal"); var depthBlitSource = ReadMsl("DepthBlit.metal");
_programDepthBlit = new Program( _programDepthBlit = new Program(
[ [
@ -574,7 +586,7 @@ namespace Ryujinx.Graphics.Metal
var srcBuffer = src.GetBuffer(); var srcBuffer = src.GetBuffer();
var dstBuffer = dst.GetBuffer(); var dstBuffer = dst.GetBuffer();
const int ParamsBufferSize = 16; const int ParamsBufferSize = 4 * sizeof(int);
// Save current state // Save current state
_pipeline.SwapState(_helperShaderState); _pipeline.SwapState(_helperShaderState);
@ -636,6 +648,58 @@ namespace Ryujinx.Graphics.Metal
_pipeline.SwapState(null); _pipeline.SwapState(null);
} }
public void ConvertIndexBuffer(
CommandBufferScoped cbs,
BufferHolder src,
BufferHolder dst,
IndexBufferPattern pattern,
int indexSize,
int srcOffset,
int indexCount)
{
// TODO: Support conversion with primitive restart enabled.
int primitiveCount = pattern.GetPrimitiveCount(indexCount);
int outputIndexSize = 4;
var srcBuffer = src.GetBuffer();
var dstBuffer = dst.GetBuffer();
const int ParamsBufferSize = 16 * sizeof(int);
// Save current state
_pipeline.SwapState(_helperShaderState);
Span<int> shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)];
shaderParams[8] = pattern.PrimitiveVertices;
shaderParams[9] = pattern.PrimitiveVerticesOut;
shaderParams[10] = indexSize;
shaderParams[11] = outputIndexSize;
shaderParams[12] = pattern.BaseIndex;
shaderParams[13] = pattern.IndexStride;
shaderParams[14] = srcOffset;
shaderParams[15] = primitiveCount;
pattern.OffsetIndex.CopyTo(shaderParams[..pattern.OffsetIndex.Length]);
using var patternScoped = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
patternScoped.Holder.SetDataUnchecked<int>(patternScoped.Offset, shaderParams);
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
sbRanges[0] = srcBuffer;
sbRanges[1] = dstBuffer;
_pipeline.SetStorageBuffers(1, sbRanges);
_pipeline.SetStorageBuffers([new BufferAssignment(3, patternScoped.Range)]);
_pipeline.SetProgram(_programConvertIndexBuffer);
_pipeline.DispatchCompute(BitUtils.DivRoundUp(primitiveCount, 16), 1, 1, "Convert Index Buffer");
// Restore previous state
_pipeline.SwapState(null);
}
public unsafe void ClearColor( public unsafe void ClearColor(
int index, int index,
ReadOnlySpan<float> clearColor, ReadOnlySpan<float> clearColor,

View File

@ -1,6 +1,5 @@
using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.GAL;
using System; using System;
using System.Collections.Generic;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Runtime.Versioning; using System.Runtime.Versioning;
@ -49,28 +48,6 @@ namespace Ryujinx.Graphics.Metal
return primitiveCount * OffsetIndex.Length; return primitiveCount * OffsetIndex.Length;
} }
public IEnumerable<int> GetIndexMapping(int indexCount)
{
int primitiveCount = GetPrimitiveCount(indexCount);
int index = BaseIndex;
for (int i = 0; i < primitiveCount; i++)
{
if (RepeatStart)
{
// Used for triangle fan
yield return 0;
}
for (int j = RepeatStart ? 1 : 0; j < OffsetIndex.Length; j++)
{
yield return index + OffsetIndex[j];
}
index += IndexStride;
}
}
public BufferHandle GetRepeatingBuffer(int vertexCount, out int indexCount) public BufferHandle GetRepeatingBuffer(int vertexCount, out int indexCount)
{ {
int primitiveCount = GetPrimitiveCount(vertexCount); int primitiveCount = GetPrimitiveCount(vertexCount);

View File

@ -62,5 +62,42 @@ namespace Ryujinx.Graphics.Metal
return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt16); return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt16);
} }
public (MTLBuffer, int, MTLIndexType) GetConvertedIndexBuffer(
MetalRenderer renderer,
CommandBufferScoped cbs,
int firstIndex,
int indexCount,
int convertedCount,
IndexBufferPattern pattern)
{
// Convert the index buffer using the given pattern.
int indexSize = GetIndexSize();
int firstIndexOffset = firstIndex * indexSize;
var autoBuffer = renderer.BufferManager.GetBufferTopologyConversion(cbs, _handle, _offset + firstIndexOffset, indexCount * indexSize, pattern, indexSize);
int size = convertedCount * 4;
if (autoBuffer != null)
{
DisposableBuffer buffer = autoBuffer.Get(cbs, 0, size);
return (buffer.Value, 0, MTLIndexType.UInt32);
}
return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt32);
}
private int GetIndexSize()
{
return _type switch
{
IndexType.UInt => 4,
IndexType.UShort => 2,
_ => 1,
};
}
} }
} }

View File

@ -404,6 +404,8 @@ namespace Ryujinx.Graphics.Metal
return; return;
} }
var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
if (TopologyUnsupported(_encoderStateManager.Topology)) if (TopologyUnsupported(_encoderStateManager.Topology))
{ {
var pattern = GetIndexBufferPattern(); var pattern = GetIndexBufferPattern();
@ -412,7 +414,6 @@ namespace Ryujinx.Graphics.Metal
var buffer = _renderer.BufferManager.GetBuffer(handle, false); var buffer = _renderer.BufferManager.GetBuffer(handle, false);
var mtlBuffer = buffer.Get(Cbs, 0, indexCount * sizeof(int)).Value; var mtlBuffer = buffer.Get(Cbs, 0, indexCount * sizeof(int)).Value;
var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
var renderCommandEncoder = GetOrCreateRenderEncoder(true); var renderCommandEncoder = GetOrCreateRenderEncoder(true);
renderCommandEncoder.DrawIndexedPrimitives( renderCommandEncoder.DrawIndexedPrimitives(
@ -424,7 +425,6 @@ namespace Ryujinx.Graphics.Metal
} }
else else
{ {
var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
var renderCommandEncoder = GetOrCreateRenderEncoder(true); var renderCommandEncoder = GetOrCreateRenderEncoder(true);
if (debugGroupName != String.Empty) if (debugGroupName != String.Empty)
@ -483,15 +483,26 @@ namespace Ryujinx.Graphics.Metal
return; return;
} }
// TODO: Reindex unsupported topologies MTLBuffer mtlBuffer;
if (TopologyUnsupported(_encoderStateManager.Topology)) int offset;
{ MTLIndexType type;
Logger.Warning?.Print(LogClass.Gpu, $"Drawing indexed with unsupported topology: {_encoderStateManager.Topology}"); int finalIndexCount = indexCount;
}
var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert(); var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
(MTLBuffer mtlBuffer, int offset, MTLIndexType type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs); if (TopologyUnsupported(_encoderStateManager.Topology))
{
var pattern = GetIndexBufferPattern();
int convertedCount = pattern.GetConvertedCount(indexCount);
finalIndexCount = convertedCount;
(mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetConvertedIndexBuffer(_renderer, Cbs, firstIndex, indexCount, convertedCount, pattern);
}
else
{
(mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs);
}
if (mtlBuffer.NativePtr != IntPtr.Zero) if (mtlBuffer.NativePtr != IntPtr.Zero)
{ {
@ -499,7 +510,7 @@ namespace Ryujinx.Graphics.Metal
renderCommandEncoder.DrawIndexedPrimitives( renderCommandEncoder.DrawIndexedPrimitives(
primitiveType, primitiveType,
(ulong)indexCount, (ulong)finalIndexCount,
type, type,
mtlBuffer, mtlBuffer,
(ulong)offset, (ulong)offset,

View File

@ -19,6 +19,7 @@
<EmbeddedResource Include="Shaders\BlitMs.metal" /> <EmbeddedResource Include="Shaders\BlitMs.metal" />
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" /> <EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
<EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" /> <EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" />
<EmbeddedResource Include="Shaders\ConvertIndexBuffer.metal" />
<EmbeddedResource Include="Shaders\ColorClear.metal" /> <EmbeddedResource Include="Shaders\ColorClear.metal" />
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" /> <EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
<EmbeddedResource Include="Shaders\DepthBlit.metal" /> <EmbeddedResource Include="Shaders\DepthBlit.metal" />

View File

@ -0,0 +1,59 @@
#include <metal_stdlib>
using namespace metal;
struct IndexBufferPattern {
int pattern[8];
int primitiveVertices;
int primitiveVerticesOut;
int indexSize;
int indexSizeOut;
int baseIndex;
int indexStride;
int srcOffset;
int totalPrimitives;
};
struct InData {
uint8_t data[1];
};
struct OutData {
uint8_t data[1];
};
struct StorageBuffers {
device InData* in_data;
device OutData* out_data;
constant IndexBufferPattern* index_buffer_pattern;
};
kernel void kernelMain(device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
uint3 thread_position_in_grid [[thread_position_in_grid]])
{
int primitiveIndex = int(thread_position_in_grid.x);
if (primitiveIndex >= storage_buffers.index_buffer_pattern->totalPrimitives)
{
return;
}
int inOffset = primitiveIndex * storage_buffers.index_buffer_pattern->indexStride;
int outOffset = primitiveIndex * storage_buffers.index_buffer_pattern->primitiveVerticesOut;
for (int i = 0; i < storage_buffers.index_buffer_pattern->primitiveVerticesOut; i++)
{
int j;
int io = max(0, inOffset + storage_buffers.index_buffer_pattern->baseIndex + storage_buffers.index_buffer_pattern->pattern[i]) * storage_buffers.index_buffer_pattern->indexSize;
int oo = (outOffset + i) * storage_buffers.index_buffer_pattern->indexSizeOut;
for (j = 0; j < storage_buffers.index_buffer_pattern->indexSize; j++)
{
storage_buffers.out_data->data[oo + j] = storage_buffers.in_data->data[storage_buffers.index_buffer_pattern->srcOffset + io + j];
}
for(; j < storage_buffers.index_buffer_pattern->indexSizeOut; j++)
{
storage_buffers.out_data->data[oo + j] = uint8_t(0);
}
}
}