diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 4e5dff596..af8541fb8 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -1,7 +1,12 @@ +using Ryujinx.Graphics.Shader.Translation; + namespace Ryujinx.Graphics.GAL { public struct Capabilities { + public readonly TargetApi Api; + public readonly string VendorName; + public readonly bool HasFrontFacingBug; public readonly bool HasVectorIndexingBug; @@ -24,6 +29,8 @@ namespace Ryujinx.Graphics.GAL public readonly int StorageBufferOffsetAlignment; public Capabilities( + TargetApi api, + string vendorName, bool hasFrontFacingBug, bool hasVectorIndexingBug, bool supportsAstcCompression, @@ -43,6 +50,8 @@ namespace Ryujinx.Graphics.GAL float maximumSupportedAnisotropy, int storageBufferOffsetAlignment) { + Api = api; + VendorName = vendorName; HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; SupportsAstcCompression = supportsAstcCompression; diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index a36d999d6..b051e9dc8 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -16,11 +16,9 @@ namespace Ryujinx.Graphics.GAL void BackgroundContextAction(Action action, bool alwaysBackground = false); - IShader CompileShader(ShaderStage stage, string code); - BufferHandle CreateBuffer(int size); - IProgram CreateProgram(IShader[] shaders, ShaderInfo info); + IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info); ISampler CreateSampler(SamplerCreateInfo info); ITexture CreateTexture(TextureCreateInfo info, float scale); diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index 67e8315b4..442a90459 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -4,7 +4,6 @@ using Ryujinx.Graphics.GAL.Multithreading.Commands.CounterEvent; using Ryujinx.Graphics.GAL.Multithreading.Commands.Program; using Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer; using Ryujinx.Graphics.GAL.Multithreading.Commands.Sampler; -using Ryujinx.Graphics.GAL.Multithreading.Commands.Shader; using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture; using Ryujinx.Graphics.GAL.Multithreading.Commands.Window; using System; @@ -53,8 +52,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading { _lookup[(int)CommandType.Action] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => ActionCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.CompileShader] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - CompileShaderCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.CreateBuffer] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => CreateBufferCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.CreateProgram] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => @@ -98,9 +95,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading _lookup[(int)CommandType.SamplerDispose] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => SamplerDisposeCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.ShaderDispose] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - ShaderDisposeCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.TextureCopyTo] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => TextureCopyToCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.TextureCopyToScaled] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index e0a03ce7f..5c42abd12 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -3,7 +3,6 @@ enum CommandType : byte { Action, - CompileShader, CreateBuffer, CreateProgram, CreateSampler, @@ -29,8 +28,6 @@ SamplerDispose, - ShaderDispose, - TextureCopyTo, TextureCopyToScaled, TextureCopyToSlice, diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs deleted file mode 100644 index 2bd9725d2..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CompileShaderCommand.cs +++ /dev/null @@ -1,22 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer -{ - struct CompileShaderCommand : IGALCommand - { - public CommandType CommandType => CommandType.CompileShader; - private TableRef _shader; - - public void Set(TableRef shader) - { - _shader = shader; - } - - public static void Run(ref CompileShaderCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - ThreadedShader shader = command._shader.Get(threaded); - shader.EnsureCreated(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs index 4d1cbb284..a96b3cef4 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs @@ -1,7 +1,4 @@ -using Ryujinx.Graphics.GAL.Multithreading.Resources; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer +namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer { struct CreateBufferCommand : IGALCommand { diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs index 67cafd188..1048dc9e6 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/PreFrameCommand.cs @@ -1,6 +1,4 @@ -using System; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer +namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer { struct PreFrameCommand : IGALCommand { diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs deleted file mode 100644 index ebb2c927f..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Shader/ShaderDisposeCommand.cs +++ /dev/null @@ -1,21 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Shader -{ - struct ShaderDisposeCommand : IGALCommand - { - public CommandType CommandType => CommandType.ShaderDispose; - private TableRef _shader; - - public void Set(TableRef shader) - { - _shader = shader; - } - - public static void Run(ref ShaderDisposeCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - command._shader.Get(threaded).Base.Dispose(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs b/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs index d808fe221..7c5f03631 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Resources/Programs/SourceProgramRequest.cs @@ -6,10 +6,10 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources.Programs { public ThreadedProgram Threaded { get; set; } - private IShader[] _shaders; + private ShaderSource[] _shaders; private ShaderInfo _info; - public SourceProgramRequest(ThreadedProgram program, IShader[] shaders, ShaderInfo info) + public SourceProgramRequest(ThreadedProgram program, ShaderSource[] shaders, ShaderInfo info) { Threaded = program; @@ -19,14 +19,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources.Programs public IProgram Create(IRenderer renderer) { - IShader[] shaders = _shaders.Select(shader => - { - var threaded = (ThreadedShader)shader; - threaded?.EnsureCreated(); - return threaded?.Base; - }).ToArray(); - - return renderer.CreateProgram(shaders, _info); + return renderer.CreateProgram(_shaders, _info); } } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs b/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs deleted file mode 100644 index dcbecf389..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedShader.cs +++ /dev/null @@ -1,38 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Commands.Shader; -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.GAL.Multithreading.Resources -{ - class ThreadedShader : IShader - { - private ThreadedRenderer _renderer; - private ShaderStage _stage; - private string _code; - - public IShader Base; - - public ThreadedShader(ThreadedRenderer renderer, ShaderStage stage, string code) - { - _renderer = renderer; - - _stage = stage; - _code = code; - } - - internal void EnsureCreated() - { - if (_code != null && Base == null) - { - Base = _renderer.BaseRenderer.CompileShader(_stage, _code); - _code = null; - } - } - - public void Dispose() - { - _renderer.New().Set(new TableRef(_renderer, this)); - _renderer.QueueCommand(); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs index 6dc8ef386..b6acfaa83 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs @@ -1,7 +1,6 @@ using Ryujinx.Graphics.GAL.Multithreading.Commands; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; -using Ryujinx.Graphics.Shader; using System; using System.Linq; diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 5030fee62..63b668bac 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -250,15 +250,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading } } - public IShader CompileShader(ShaderStage stage, string code) - { - var shader = new ThreadedShader(this, stage, code); - New().Set(Ref(shader)); - QueueCommand(); - - return shader; - } - public BufferHandle CreateBuffer(int size) { BufferHandle handle = Buffers.CreateBufferHandle(); @@ -268,7 +259,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading return handle; } - public IProgram CreateProgram(IShader[] shaders, ShaderInfo info) + public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) { var program = new ThreadedProgram(this); SourceProgramRequest request = new SourceProgramRequest(program, shaders, info); diff --git a/Ryujinx.Graphics.GAL/ShaderSource.cs b/Ryujinx.Graphics.GAL/ShaderSource.cs new file mode 100644 index 000000000..13b92f20a --- /dev/null +++ b/Ryujinx.Graphics.GAL/ShaderSource.cs @@ -0,0 +1,29 @@ +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.GAL +{ + public struct ShaderSource + { + public string Code { get; } + public byte[] BinaryCode { get; } + public ShaderStage Stage { get; } + public TargetLanguage Language { get; } + + public ShaderSource(string code, byte[] binaryCode, ShaderStage stage, TargetLanguage language) + { + Code = code; + BinaryCode = binaryCode; + Stage = stage; + Language = language; + } + + public ShaderSource(string code, ShaderStage stage, TargetLanguage language) : this(code, null, stage, language) + { + } + + public ShaderSource(byte[] binaryCode, ShaderStage stage, TargetLanguage language) : this(null, binaryCode, stage, language) + { + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 00015c404..87c14da8f 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -124,24 +124,20 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; - GpuAccessorState gas = new GpuAccessorState( + GpuChannelPoolState poolState = new GpuChannelPoolState( texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex, - _state.State.SetBindlessTextureConstantBufferSlotSelect, - false, - PrimitiveTopology.Points, - default); + _state.State.SetBindlessTextureConstantBufferSlotSelect); - ShaderBundle cs = memoryManager.Physical.ShaderCache.GetComputeShader( - _channel, - gas, - shaderGpuVa, + GpuChannelComputeState computeState = new GpuChannelComputeState( qmd.CtaThreadDimension0, qmd.CtaThreadDimension1, qmd.CtaThreadDimension2, localMemorySize, sharedMemorySize); + CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 8d67d0fd7..c9a18f144 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -7,7 +7,6 @@ using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Texture; using System; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Engine.Threed { @@ -30,6 +29,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private readonly StateUpdateTracker _updateTracker; private readonly ShaderProgramInfo[] _currentProgramInfo; + private ShaderSpecializationState _shaderSpecState; private bool _vtgWritesRtLayer; private byte _vsClipDistancesWritten; @@ -195,6 +195,17 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Update() { + // If any state that the shader depends on changed, + // then we may need to compile/bind a different version + // of the shader for the new state. + if (_shaderSpecState != null) + { + if (!_shaderSpecState.MatchesGraphics(_channel, GetPoolState())) + { + ForceShaderUpdate(); + } + } + // The vertex buffer size is calculated using a different // method when doing indexed draws, so we need to make sure // to update the vertex buffers if we are doing a regular @@ -1065,108 +1076,127 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// private void UpdateShaderState() { + var shaderCache = _channel.MemoryManager.Physical.ShaderCache; + + _vtgWritesRtLayer = false; + ShaderAddresses addresses = new ShaderAddresses(); - - Span addressesSpan = MemoryMarshal.CreateSpan(ref addresses, 1); - - Span addressesArray = MemoryMarshal.Cast(addressesSpan); + Span addressesSpan = addresses.AsSpan(); ulong baseAddress = _state.State.ShaderBaseAddress.Pack(); for (int index = 0; index < 6; index++) { var shader = _state.State.ShaderState[index]; - if (!shader.UnpackEnable() && index != 1) { continue; } - addressesArray[index] = baseAddress + shader.Offset; + addressesSpan[index] = baseAddress + shader.Offset; } - GpuAccessorState gas = new GpuAccessorState( - _state.State.TexturePoolState.Address.Pack(), - _state.State.TexturePoolState.MaximumId, - (int)_state.State.TextureBufferIndex, - _state.State.EarlyZForce, - _drawState.Topology, - _state.State.TessMode); + GpuChannelPoolState poolState = GetPoolState(); + GpuChannelGraphicsState graphicsState = GetGraphicsState(); - ShaderBundle gs = _channel.MemoryManager.Physical.ShaderCache.GetGraphicsShader(ref _state.State, _channel, gas, addresses); + CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, _channel, poolState, graphicsState, addresses); + + _shaderSpecState = gs.SpecializationState; byte oldVsClipDistancesWritten = _vsClipDistancesWritten; - _drawState.VsUsesInstanceId = gs.Shaders[0]?.Info.UsesInstanceId ?? false; - _vsClipDistancesWritten = gs.Shaders[0]?.Info.ClipDistancesWritten ?? 0; - _vtgWritesRtLayer = false; + _drawState.VsUsesInstanceId = gs.Shaders[1]?.Info.UsesInstanceId ?? false; + _vsClipDistancesWritten = gs.Shaders[1]?.Info.ClipDistancesWritten ?? 0; if (oldVsClipDistancesWritten != _vsClipDistancesWritten) { UpdateUserClipState(); } - for (int stage = 0; stage < Constants.ShaderStages; stage++) + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { - ShaderProgramInfo info = gs.Shaders[stage]?.Info; - - _currentProgramInfo[stage] = info; - - if (info == null) - { - _channel.TextureManager.RentGraphicsTextureBindings(stage, 0); - _channel.TextureManager.RentGraphicsImageBindings(stage, 0); - _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, null); - _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, null); - continue; - } - - Span textureBindings = _channel.TextureManager.RentGraphicsTextureBindings(stage, info.Textures.Count); - - if (info.UsesRtLayer) - { - _vtgWritesRtLayer = true; - } - - for (int index = 0; index < info.Textures.Count; index++) - { - var descriptor = info.Textures[index]; - - Target target = ShaderTexture.GetTarget(descriptor.Type); - - textureBindings[index] = new TextureBindingInfo( - target, - descriptor.Binding, - descriptor.CbufSlot, - descriptor.HandleIndex, - descriptor.Flags); - } - - TextureBindingInfo[] imageBindings = _channel.TextureManager.RentGraphicsImageBindings(stage, info.Images.Count); - - for (int index = 0; index < info.Images.Count; index++) - { - var descriptor = info.Images[index]; - - Target target = ShaderTexture.GetTarget(descriptor.Type); - Format format = ShaderTexture.GetFormat(descriptor.Format); - - imageBindings[index] = new TextureBindingInfo( - target, - format, - descriptor.Binding, - descriptor.CbufSlot, - descriptor.HandleIndex, - descriptor.Flags); - } - - _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, info.SBuffers); - _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, info.CBuffers); + UpdateStageBindings(stageIndex, gs.Shaders[stageIndex + 1]?.Info); } _context.Renderer.Pipeline.SetProgram(gs.HostProgram); } + private void UpdateStageBindings(int stage, ShaderProgramInfo info) + { + _currentProgramInfo[stage] = info; + + if (info == null) + { + _channel.TextureManager.RentGraphicsTextureBindings(stage, 0); + _channel.TextureManager.RentGraphicsImageBindings(stage, 0); + _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, null); + _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, null); + return; + } + + Span textureBindings = _channel.TextureManager.RentGraphicsTextureBindings(stage, info.Textures.Count); + + if (info.UsesRtLayer) + { + _vtgWritesRtLayer = true; + } + + for (int index = 0; index < info.Textures.Count; index++) + { + var descriptor = info.Textures[index]; + + Target target = ShaderTexture.GetTarget(descriptor.Type); + + textureBindings[index] = new TextureBindingInfo( + target, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); + } + + TextureBindingInfo[] imageBindings = _channel.TextureManager.RentGraphicsImageBindings(stage, info.Images.Count); + + for (int index = 0; index < info.Images.Count; index++) + { + var descriptor = info.Images[index]; + + Target target = ShaderTexture.GetTarget(descriptor.Type); + Format format = ShaderTexture.GetFormat(descriptor.Format); + + imageBindings[index] = new TextureBindingInfo( + target, + format, + descriptor.Binding, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Flags); + } + + _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, info.SBuffers); + _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, info.CBuffers); + } + + private GpuChannelPoolState GetPoolState() + { + return new GpuChannelPoolState( + _state.State.TexturePoolState.Address.Pack(), + _state.State.TexturePoolState.MaximumId, + (int)_state.State.TextureBufferIndex); + } + + /// + /// Gets the current GPU channel state for shader creation or compatibility verification. + /// + /// Current GPU channel state + private GpuChannelGraphicsState GetGraphicsState() + { + return new GpuChannelGraphicsState( + _state.State.EarlyZForce, + _drawState.Topology, + _state.State.TessMode); + } + /// /// Forces the shaders to be rebound on the next draw. /// diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index e6697b3ae..66077c3bf 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -238,13 +238,13 @@ namespace Ryujinx.Graphics.Gpu /// /// Initialize the GPU shader cache. /// - public void InitializeShaderCache() + public void InitializeShaderCache(CancellationToken cancellationToken) { HostInitalized.WaitOne(); foreach (var physicalMemory in PhysicalMemoryRegistry.Values) { - physicalMemory.ShaderCache.Initialize(); + physicalMemory.ShaderCache.Initialize(cancellationToken); } } diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index ae27c7129..0ac6160d9 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -115,6 +115,73 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Gets a read-only span of data from GPU mapped memory, up to the entire range specified, + /// or the last mapped page if the range is not fully mapped. + /// + /// GPU virtual address where the data is located + /// Size of the data + /// True if read tracking is triggered on the span + /// The span of the data at the specified memory location + public ReadOnlySpan GetSpanMapped(ulong va, int size, bool tracked = false) + { + bool isContiguous = true; + int mappedSize; + + if (ValidateAddress(va) && GetPte(va) != PteUnmapped && Physical.IsMapped(Translate(va))) + { + ulong endVa = va + (ulong)size; + ulong endVaAligned = (endVa + PageMask) & ~PageMask; + ulong currentVa = va & ~PageMask; + + int pages = (int)((endVaAligned - currentVa) / PageSize); + + for (int page = 0; page < pages - 1; page++) + { + ulong nextVa = currentVa + PageSize; + ulong nextPa = Translate(nextVa); + + if (!ValidateAddress(nextVa) || GetPte(nextVa) == PteUnmapped || !Physical.IsMapped(nextPa)) + { + break; + } + + if (Translate(currentVa) + PageSize != nextPa) + { + isContiguous = false; + } + + currentVa += PageSize; + } + + currentVa += PageSize; + + if (currentVa > endVa) + { + currentVa = endVa; + } + + mappedSize = (int)(currentVa - va); + } + else + { + return ReadOnlySpan.Empty; + } + + if (isContiguous) + { + return Physical.GetSpan(Translate(va), mappedSize, tracked); + } + else + { + Span data = new byte[mappedSize]; + + ReadImpl(va, data, tracked); + + return data; + } + } + /// /// Reads data from a possibly non-contiguous region of GPU mapped memory. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs index 57590fb38..155cba0f5 100644 --- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs +++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs @@ -341,9 +341,9 @@ namespace Ryujinx.Graphics.Gpu.Memory } /// - /// Checks if the page at a given address is mapped on CPU memory. + /// Checks if a given memory page is mapped. /// - /// CPU virtual address of the page to check + /// CPU virtual address of the page /// True if mapped, false otherwise public bool IsMapped(ulong address) { diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs index 464436ea0..d16afb650 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs @@ -2,11 +2,8 @@ using Ryujinx.Common; using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; using Ryujinx.Graphics.Shader; -using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.IO; @@ -20,70 +17,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache /// static class CacheHelper { - /// - /// Try to read the manifest header from a given file path. - /// - /// The path to the manifest file - /// The manifest header read - /// Return true if the manifest header was read - public static bool TryReadManifestHeader(string manifestPath, out CacheManifestHeader header) - { - header = default; - - if (File.Exists(manifestPath)) - { - Memory rawManifest = File.ReadAllBytes(manifestPath); - - if (MemoryMarshal.TryRead(rawManifest.Span, out header)) - { - return true; - } - } - - return false; - } - - /// - /// Try to read the manifest from a given file path. - /// - /// The path to the manifest file - /// The graphics api used by the cache - /// The hash type of the cache - /// The manifest header read - /// The entries read from the cache manifest - /// Return true if the manifest was read - public static bool TryReadManifestFile(string manifestPath, CacheGraphicsApi graphicsApi, CacheHashType hashType, out CacheManifestHeader header, out HashSet entries) - { - header = default; - entries = new HashSet(); - - if (File.Exists(manifestPath)) - { - Memory rawManifest = File.ReadAllBytes(manifestPath); - - if (MemoryMarshal.TryRead(rawManifest.Span, out header)) - { - Memory hashTableRaw = rawManifest.Slice(Unsafe.SizeOf()); - - bool isValid = header.IsValid(graphicsApi, hashType, hashTableRaw.Span); - - if (isValid) - { - ReadOnlySpan hashTable = MemoryMarshal.Cast(hashTableRaw.Span); - - foreach (Hash128 hash in hashTable) - { - entries.Add(hash); - } - } - - return isValid; - } - } - - return false; - } - /// /// Compute a cache manifest from runtime data. /// @@ -246,82 +179,23 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache return null; } - /// - /// Compute the guest program code for usage while dumping to disk or hash. - /// - /// The guest shader entries to use - /// The transform feedback descriptors - /// Used to determine if the guest program code is generated for hashing - /// The guest program code for usage while dumping to disk or hash - private static byte[] ComputeGuestProgramCode(ReadOnlySpan cachedShaderEntries, TransformFeedbackDescriptor[] tfd, bool forHashCompute = false) - { - using (MemoryStream stream = new MemoryStream()) - { - BinaryWriter writer = new BinaryWriter(stream); - - foreach (GuestShaderCacheEntry cachedShaderEntry in cachedShaderEntries) - { - if (cachedShaderEntry != null) - { - // Code (and Code A if present) - stream.Write(cachedShaderEntry.Code); - - if (forHashCompute) - { - // Guest GPU accessor header (only write this for hashes, already present in the header for dumps) - writer.WriteStruct(cachedShaderEntry.Header.GpuAccessorHeader); - } - - // Texture descriptors - foreach (GuestTextureDescriptor textureDescriptor in cachedShaderEntry.TextureDescriptors.Values) - { - writer.WriteStruct(textureDescriptor); - } - } - } - - // Transform feedback - if (tfd != null) - { - foreach (TransformFeedbackDescriptor transform in tfd) - { - writer.WriteStruct(new GuestShaderCacheTransformFeedbackHeader(transform.BufferIndex, transform.Stride, transform.VaryingLocations.Length)); - writer.Write(transform.VaryingLocations); - } - } - - return stream.ToArray(); - } - } - - /// - /// Compute a guest hash from shader entries. - /// - /// The guest shader entries to use - /// The optional transform feedback descriptors - /// A guest hash from shader entries - public static Hash128 ComputeGuestHashFromCache(ReadOnlySpan cachedShaderEntries, TransformFeedbackDescriptor[] tfd = null) - { - return XXHash128.ComputeHash(ComputeGuestProgramCode(cachedShaderEntries, tfd, true)); - } - /// /// Read transform feedback descriptors from guest. /// /// The raw guest transform feedback descriptors /// The guest shader program header /// The transform feedback descriptors read from guest - public static TransformFeedbackDescriptor[] ReadTransformFeedbackInformation(ref ReadOnlySpan data, GuestShaderCacheHeader header) + public static TransformFeedbackDescriptorOld[] ReadTransformFeedbackInformation(ref ReadOnlySpan data, GuestShaderCacheHeader header) { if (header.TransformFeedbackCount != 0) { - TransformFeedbackDescriptor[] result = new TransformFeedbackDescriptor[header.TransformFeedbackCount]; + TransformFeedbackDescriptorOld[] result = new TransformFeedbackDescriptorOld[header.TransformFeedbackCount]; for (int i = 0; i < result.Length; i++) { GuestShaderCacheTransformFeedbackHeader feedbackHeader = MemoryMarshal.Read(data); - result[i] = new TransformFeedbackDescriptor(feedbackHeader.BufferIndex, feedbackHeader.Stride, data.Slice(Unsafe.SizeOf(), feedbackHeader.VaryingLocationsLength).ToArray()); + result[i] = new TransformFeedbackDescriptorOld(feedbackHeader.BufferIndex, feedbackHeader.Stride, data.Slice(Unsafe.SizeOf(), feedbackHeader.VaryingLocationsLength).ToArray()); data = data.Slice(Unsafe.SizeOf() + feedbackHeader.VaryingLocationsLength); } @@ -332,205 +206,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache return null; } - /// - /// Builds gpu state flags using information from the given gpu accessor. - /// - /// The gpu accessor - /// The gpu state flags - private static GuestGpuStateFlags GetGpuStateFlags(IGpuAccessor gpuAccessor) - { - GuestGpuStateFlags flags = 0; - - if (gpuAccessor.QueryEarlyZForce()) - { - flags |= GuestGpuStateFlags.EarlyZForce; - } - - return flags; - } - - /// - /// Packs the tessellation parameters from the gpu accessor. - /// - /// The gpu accessor - /// The packed tessellation parameters - private static byte GetTessellationModePacked(IGpuAccessor gpuAccessor) - { - byte value; - - value = (byte)((int)gpuAccessor.QueryTessPatchType() & 3); - value |= (byte)(((int)gpuAccessor.QueryTessSpacing() & 3) << 2); - - if (gpuAccessor.QueryTessCw()) - { - value |= 0x10; - } - - return value; - } - - /// - /// Create a new instance of from an gpu accessor. - /// - /// The gpu accessor - /// A new instance of - public static GuestGpuAccessorHeader CreateGuestGpuAccessorCache(IGpuAccessor gpuAccessor) - { - return new GuestGpuAccessorHeader - { - ComputeLocalSizeX = gpuAccessor.QueryComputeLocalSizeX(), - ComputeLocalSizeY = gpuAccessor.QueryComputeLocalSizeY(), - ComputeLocalSizeZ = gpuAccessor.QueryComputeLocalSizeZ(), - ComputeLocalMemorySize = gpuAccessor.QueryComputeLocalMemorySize(), - ComputeSharedMemorySize = gpuAccessor.QueryComputeSharedMemorySize(), - PrimitiveTopology = gpuAccessor.QueryPrimitiveTopology(), - TessellationModePacked = GetTessellationModePacked(gpuAccessor), - StateFlags = GetGpuStateFlags(gpuAccessor) - }; - } - - /// - /// Create guest shader cache entries from the runtime contexts. - /// - /// The GPU channel in use - /// The runtime contexts - /// Guest shader cahe entries from the runtime contexts - public static GuestShaderCacheEntry[] CreateShaderCacheEntries(GpuChannel channel, ReadOnlySpan shaderContexts) - { - MemoryManager memoryManager = channel.MemoryManager; - - int startIndex = shaderContexts.Length > 1 ? 1 : 0; - - GuestShaderCacheEntry[] entries = new GuestShaderCacheEntry[shaderContexts.Length - startIndex]; - - for (int i = startIndex; i < shaderContexts.Length; i++) - { - TranslatorContext context = shaderContexts[i]; - - if (context == null) - { - continue; - } - - GpuAccessor gpuAccessor = context.GpuAccessor as GpuAccessor; - - ulong cb1DataAddress; - int cb1DataSize = gpuAccessor?.Cb1DataSize ?? 0; - - if (context.Stage == ShaderStage.Compute) - { - cb1DataAddress = channel.BufferManager.GetComputeUniformBufferAddress(1); - } - else - { - int stageIndex = context.Stage switch - { - ShaderStage.TessellationControl => 1, - ShaderStage.TessellationEvaluation => 2, - ShaderStage.Geometry => 3, - ShaderStage.Fragment => 4, - _ => 0 - }; - - cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, 1); - } - - int size = context.Size; - - TranslatorContext translatorContext2 = i == 1 ? shaderContexts[0] : null; - - int sizeA = translatorContext2 != null ? translatorContext2.Size : 0; - - byte[] code = new byte[size + cb1DataSize + sizeA]; - - memoryManager.GetSpan(context.Address, size).CopyTo(code); - - if (cb1DataAddress != 0 && cb1DataSize != 0) - { - memoryManager.Physical.GetSpan(cb1DataAddress, cb1DataSize).CopyTo(code.AsSpan(size, cb1DataSize)); - } - - if (translatorContext2 != null) - { - memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan(size + cb1DataSize, sizeA)); - } - - GuestGpuAccessorHeader gpuAccessorHeader = CreateGuestGpuAccessorCache(context.GpuAccessor); - - if (gpuAccessor != null) - { - gpuAccessorHeader.TextureDescriptorCount = context.TextureHandlesForCache.Count; - } - - GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader( - context.Stage, - size + cb1DataSize, - sizeA, - cb1DataSize, - gpuAccessorHeader); - - GuestShaderCacheEntry entry = new GuestShaderCacheEntry(header, code); - - if (gpuAccessor != null) - { - foreach (int textureHandle in context.TextureHandlesForCache) - { - GuestTextureDescriptor textureDescriptor = ((Image.TextureDescriptor)gpuAccessor.GetTextureDescriptor(textureHandle, -1)).ToCache(); - - textureDescriptor.Handle = (uint)textureHandle; - - entry.TextureDescriptors.Add(textureHandle, textureDescriptor); - } - } - - entries[i - startIndex] = entry; - } - - return entries; - } - - /// - /// Create a guest shader program. - /// - /// The entries composing the guest program dump - /// The transform feedback descriptors in use - /// The resulting guest shader program - public static byte[] CreateGuestProgramDump(GuestShaderCacheEntry[] shaderCacheEntries, TransformFeedbackDescriptor[] tfd = null) - { - using (MemoryStream resultStream = new MemoryStream()) - { - BinaryWriter resultStreamWriter = new BinaryWriter(resultStream); - - byte transformFeedbackCount = 0; - - if (tfd != null) - { - transformFeedbackCount = (byte)tfd.Length; - } - - // Header - resultStreamWriter.WriteStruct(new GuestShaderCacheHeader((byte)shaderCacheEntries.Length, transformFeedbackCount)); - - // Write all entries header - foreach (GuestShaderCacheEntry entry in shaderCacheEntries) - { - if (entry == null) - { - resultStreamWriter.WriteStruct(new GuestShaderCacheEntryHeader()); - } - else - { - resultStreamWriter.WriteStruct(entry.Header); - } - } - - // Finally, write all program code and all transform feedback information. - resultStreamWriter.Write(ComputeGuestProgramCode(shaderCacheEntries, tfd)); - - return resultStream.ToArray(); - } - } - /// /// Save temporary files not in archive. /// diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs index 3fc11e822..e67221e79 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs @@ -47,8 +47,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache string baseCacheDirectory = CacheHelper.GetBaseCacheDirectory(titleId); - CacheMigration.Run(baseCacheDirectory, graphicsApi, hashType, shaderProvider); - _guestProgramCache = new CacheCollection(baseCacheDirectory, _hashType, CacheGraphicsApi.Guest, "", "program", GuestCacheVersion); _hostProgramCache = new CacheCollection(baseCacheDirectory, _hashType, _graphicsApi, _shaderProvider, "host", shaderCodeGenVersion); } diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs deleted file mode 100644 index 5b4a17135..000000000 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheMigration.cs +++ /dev/null @@ -1,175 +0,0 @@ -using ICSharpCode.SharpZipLib.Zip; -using Ryujinx.Common; -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; -using System; -using System.Collections.Generic; -using System.IO; - -namespace Ryujinx.Graphics.Gpu.Shader.Cache -{ - /// - /// Class handling shader cache migrations. - /// - static class CacheMigration - { - /// - /// Check if the given cache version need to recompute its hash. - /// - /// The version in use - /// The new version after migration - /// True if a hash recompute is needed - public static bool NeedHashRecompute(ulong version, out ulong newVersion) - { - const ulong TargetBrokenVersion = 1717; - const ulong TargetFixedVersion = 1759; - - newVersion = TargetFixedVersion; - - if (version == TargetBrokenVersion) - { - return true; - } - - return false; - } - - private class StreamZipEntryDataSource : IStaticDataSource - { - private readonly ZipFile Archive; - private readonly ZipEntry Entry; - public StreamZipEntryDataSource(ZipFile archive, ZipEntry entry) - { - Archive = archive; - Entry = entry; - } - - public Stream GetSource() - { - return Archive.GetInputStream(Entry); - } - } - - /// - /// Move a file with the name of a given hash to another in the cache archive. - /// - /// The archive in use - /// The old key - /// The new key - private static void MoveEntry(ZipFile archive, Hash128 oldKey, Hash128 newKey) - { - ZipEntry oldGuestEntry = archive.GetEntry($"{oldKey}"); - - if (oldGuestEntry != null) - { - archive.Add(new StreamZipEntryDataSource(archive, oldGuestEntry), $"{newKey}", CompressionMethod.Deflated); - archive.Delete(oldGuestEntry); - } - } - - /// - /// Recompute all the hashes of a given cache. - /// - /// The guest cache directory path - /// The host cache directory path - /// The graphics api in use - /// The hash type in use - /// The version to write in the host and guest manifest after migration - private static void RecomputeHashes(string guestBaseCacheDirectory, string hostBaseCacheDirectory, CacheGraphicsApi graphicsApi, CacheHashType hashType, ulong newVersion) - { - string guestManifestPath = CacheHelper.GetManifestPath(guestBaseCacheDirectory); - string hostManifestPath = CacheHelper.GetManifestPath(hostBaseCacheDirectory); - - if (CacheHelper.TryReadManifestFile(guestManifestPath, CacheGraphicsApi.Guest, hashType, out _, out HashSet guestEntries)) - { - CacheHelper.TryReadManifestFile(hostManifestPath, graphicsApi, hashType, out _, out HashSet hostEntries); - - Logger.Info?.Print(LogClass.Gpu, "Shader cache hashes need to be recomputed, performing migration..."); - - string guestArchivePath = CacheHelper.GetArchivePath(guestBaseCacheDirectory); - string hostArchivePath = CacheHelper.GetArchivePath(hostBaseCacheDirectory); - - ZipFile guestArchive = new ZipFile(File.Open(guestArchivePath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None)); - ZipFile hostArchive = new ZipFile(File.Open(hostArchivePath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None)); - - CacheHelper.EnsureArchiveUpToDate(guestBaseCacheDirectory, guestArchive, guestEntries); - CacheHelper.EnsureArchiveUpToDate(hostBaseCacheDirectory, hostArchive, hostEntries); - - int programIndex = 0; - - HashSet newEntries = new HashSet(); - - foreach (Hash128 oldHash in guestEntries) - { - byte[] guestProgram = CacheHelper.ReadFromArchive(guestArchive, oldHash); - - Logger.Info?.Print(LogClass.Gpu, $"Migrating shader {oldHash} ({programIndex + 1} / {guestEntries.Count})"); - - if (guestProgram != null) - { - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - Hash128 newHash = CacheHelper.ComputeGuestHashFromCache(cachedShaderEntries, tfd); - - if (newHash != oldHash) - { - MoveEntry(guestArchive, oldHash, newHash); - MoveEntry(hostArchive, oldHash, newHash); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Same hashes for shader {oldHash}"); - } - - newEntries.Add(newHash); - } - - programIndex++; - } - - byte[] newGuestManifestContent = CacheHelper.ComputeManifest(newVersion, CacheGraphicsApi.Guest, hashType, newEntries); - byte[] newHostManifestContent = CacheHelper.ComputeManifest(newVersion, graphicsApi, hashType, newEntries); - - File.WriteAllBytes(guestManifestPath, newGuestManifestContent); - File.WriteAllBytes(hostManifestPath, newHostManifestContent); - - guestArchive.CommitUpdate(); - hostArchive.CommitUpdate(); - - guestArchive.Close(); - hostArchive.Close(); - } - } - - /// - /// Check and run cache migration if needed. - /// - /// The base path of the cache - /// The graphics api in use - /// The hash type in use - /// The shader provider name of the cache - public static void Run(string baseCacheDirectory, CacheGraphicsApi graphicsApi, CacheHashType hashType, string shaderProvider) - { - string guestBaseCacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, CacheGraphicsApi.Guest, "", "program"); - string hostBaseCacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, graphicsApi, shaderProvider, "host"); - - string guestArchivePath = CacheHelper.GetArchivePath(guestBaseCacheDirectory); - string hostArchivePath = CacheHelper.GetArchivePath(hostBaseCacheDirectory); - - bool isReadOnly = CacheHelper.IsArchiveReadOnly(guestArchivePath) || CacheHelper.IsArchiveReadOnly(hostArchivePath); - - if (!isReadOnly && CacheHelper.TryReadManifestHeader(CacheHelper.GetManifestPath(guestBaseCacheDirectory), out CacheManifestHeader header)) - { - if (NeedHashRecompute(header.Version, out ulong newVersion)) - { - RecomputeHashes(guestBaseCacheDirectory, hostBaseCacheDirectory, graphicsApi, hashType, newVersion); - } - } - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs index 819c6bcc9..fe79acb34 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs @@ -96,6 +96,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition SBuffers, Textures, Images, + default, Header.UseFlags.HasFlag(UseFlags.InstanceId), Header.UseFlags.HasFlag(UseFlags.RtLayer), Header.ClipDistancesWritten, @@ -160,7 +161,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition /// The host shader program /// The shaders code holder /// Raw data of a new host shader cache file - internal static byte[] Create(ReadOnlySpan programCode, ShaderCodeHolder[] codeHolders) + internal static byte[] Create(ReadOnlySpan programCode, CachedShaderStage[] codeHolders) { HostShaderCacheHeader header = new HostShaderCacheHeader((byte)codeHolders.Length, programCode.Length); diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs new file mode 100644 index 000000000..27fac8f37 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs @@ -0,0 +1,255 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.Cache +{ + /// + /// Class handling shader cache migrations. + /// + static class Migration + { + // Last codegen version before the migration to the new cache. + private const ulong ShaderCodeGenVersion = 3054; + + /// + /// Migrates from the old cache format to the new one. + /// + /// GPU context + /// Disk cache host storage (used to create the new shader files) + /// Number of migrated shaders + public static int MigrateFromLegacyCache(GpuContext context, DiskCacheHostStorage hostStorage) + { + string baseCacheDirectory = CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId); + string cacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, CacheGraphicsApi.Guest, "", "program"); + + // If the directory does not exist, we have no old cache. + // Exist early as the CacheManager constructor will create the directories. + if (!Directory.Exists(cacheDirectory)) + { + return 0; + } + + if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) + { + CacheManager cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); + + bool isReadOnly = cacheManager.IsReadOnly; + + HashSet invalidEntries = null; + + if (isReadOnly) + { + Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); + } + else + { + invalidEntries = new HashSet(); + } + + ReadOnlySpan guestProgramList = cacheManager.GetGuestProgramList(); + + for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) + { + Hash128 key = guestProgramList[programIndex]; + + byte[] guestProgram = cacheManager.GetGuestProgramByHash(ref key); + + if (guestProgram == null) + { + Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + + continue; + } + + ReadOnlySpan guestProgramReadOnlySpan = guestProgram; + + ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); + + if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) + { + Debug.Assert(cachedShaderEntries.Length == 1); + + GuestShaderCacheEntry entry = cachedShaderEntries[0]; + + byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); + + Span codeSpan = entry.Code; + byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); + + ShaderProgramInfo info = new ShaderProgramInfo( + Array.Empty(), + Array.Empty(), + Array.Empty(), + Array.Empty(), + ShaderStage.Compute, + false, + false, + 0, + 0); + + GpuChannelComputeState computeState = new GpuChannelComputeState( + entry.Header.GpuAccessorHeader.ComputeLocalSizeX, + entry.Header.GpuAccessorHeader.ComputeLocalSizeY, + entry.Header.GpuAccessorHeader.ComputeLocalSizeZ, + entry.Header.GpuAccessorHeader.ComputeLocalMemorySize, + entry.Header.GpuAccessorHeader.ComputeSharedMemorySize); + + ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + + foreach (var td in entry.TextureDescriptors) + { + var handle = td.Key; + var data = td.Value; + + specState.RegisterTexture( + 0, + handle, + -1, + data.UnpackFormat(), + data.UnpackSrgb(), + data.UnpackTextureTarget(), + data.UnpackTextureCoordNormalized()); + } + + CachedShaderStage shader = new CachedShaderStage(info, code, cb1Data); + CachedShaderProgram program = new CachedShaderProgram(null, specState, shader); + + hostStorage.AddShader(context, program, ReadOnlySpan.Empty); + } + else + { + Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); + + CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; + List shaderPrograms = new List(); + + TransformFeedbackDescriptorOld[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); + + GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); + + GuestGpuAccessorHeader accessorHeader = entries[0].Header.GpuAccessorHeader; + + TessMode tessMode = new TessMode(); + + int tessPatchType = accessorHeader.TessellationModePacked & 3; + int tessSpacing = (accessorHeader.TessellationModePacked >> 2) & 3; + bool tessCw = (accessorHeader.TessellationModePacked & 0x10) != 0; + + tessMode.Packed = (uint)tessPatchType; + tessMode.Packed |= (uint)(tessSpacing << 4); + + if (tessCw) + { + tessMode.Packed |= 0x100; + } + + PrimitiveTopology topology = accessorHeader.PrimitiveTopology switch + { + InputTopology.Lines => PrimitiveTopology.Lines, + InputTopology.LinesAdjacency => PrimitiveTopology.LinesAdjacency, + InputTopology.Triangles => PrimitiveTopology.Triangles, + InputTopology.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency, + _ => PrimitiveTopology.Points + }; + + GpuChannelGraphicsState graphicsState = new GpuChannelGraphicsState( + accessorHeader.StateFlags.HasFlag(GuestGpuStateFlags.EarlyZForce), + topology, + tessMode); + + TransformFeedbackDescriptor[] tfdNew = null; + + if (tfd != null) + { + tfdNew = new TransformFeedbackDescriptor[tfd.Length]; + + for (int tfIndex = 0; tfIndex < tfd.Length; tfIndex++) + { + Array32 varyingLocations = new Array32(); + Span varyingLocationsSpan = MemoryMarshal.Cast(varyingLocations.ToSpan()); + tfd[tfIndex].VaryingLocations.CopyTo(varyingLocationsSpan.Slice(0, tfd[tfIndex].VaryingLocations.Length)); + + tfdNew[tfIndex] = new TransformFeedbackDescriptor( + tfd[tfIndex].BufferIndex, + tfd[tfIndex].Stride, + tfd[tfIndex].VaryingLocations.Length, + ref varyingLocations); + } + } + + ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew); + + for (int i = 0; i < entries.Length; i++) + { + GuestShaderCacheEntry entry = entries[i]; + + if (entry == null) + { + continue; + } + + ShaderProgramInfo info = new ShaderProgramInfo( + Array.Empty(), + Array.Empty(), + Array.Empty(), + Array.Empty(), + (ShaderStage)(i + 1), + false, + false, + 0, + 0); + + // NOTE: Vertex B comes first in the shader cache. + byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); + byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan(entry.Header.Size, entry.Header.SizeA).ToArray() : null; + + Span codeSpan = entry.Code; + byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); + + shaders[i + 1] = new CachedShaderStage(info, code, cb1Data); + + if (code2 != null) + { + shaders[0] = new CachedShaderStage(null, code2, cb1Data); + } + + foreach (var td in entry.TextureDescriptors) + { + var handle = td.Key; + var data = td.Value; + + specState.RegisterTexture( + i, + handle, + -1, + data.UnpackFormat(), + data.UnpackSrgb(), + data.UnpackTextureTarget(), + data.UnpackTextureCoordNormalized()); + } + } + + CachedShaderProgram program = new CachedShaderProgram(null, specState, shaders); + + hostStorage.AddShader(context, program, ReadOnlySpan.Empty); + } + } + + return guestProgramList.Length; + } + + return 0; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs new file mode 100644 index 000000000..5e9c6711d --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/TransformFeedbackDescriptorOld.cs @@ -0,0 +1,19 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.Cache +{ + struct TransformFeedbackDescriptorOld + { + public int BufferIndex { get; } + public int Stride { get; } + + public byte[] VaryingLocations { get; } + + public TransformFeedbackDescriptorOld(int bufferIndex, int stride, byte[] varyingLocations) + { + BufferIndex = bufferIndex; + Stride = stride; + VaryingLocations = varyingLocations ?? throw new ArgumentNullException(nameof(varyingLocations)); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs deleted file mode 100644 index d65349a52..000000000 --- a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs +++ /dev/null @@ -1,222 +0,0 @@ -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; -using Ryujinx.Graphics.Shader; -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - class CachedGpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor - { - private readonly ReadOnlyMemory _data; - private readonly ReadOnlyMemory _cb1Data; - private readonly GuestGpuAccessorHeader _header; - private readonly Dictionary _textureDescriptors; - private readonly TransformFeedbackDescriptor[] _tfd; - - /// - /// Creates a new instance of the cached GPU state accessor for shader translation. - /// - /// GPU context - /// The data of the shader - /// The constant buffer 1 data of the shader - /// The cache of the GPU accessor - /// The cache of the texture descriptors - public CachedGpuAccessor( - GpuContext context, - ReadOnlyMemory data, - ReadOnlyMemory cb1Data, - GuestGpuAccessorHeader header, - IReadOnlyDictionary guestTextureDescriptors, - TransformFeedbackDescriptor[] tfd) : base(context) - { - _data = data; - _cb1Data = cb1Data; - _header = header; - _textureDescriptors = new Dictionary(); - - foreach (KeyValuePair guestTextureDescriptor in guestTextureDescriptors) - { - _textureDescriptors.Add(guestTextureDescriptor.Key, guestTextureDescriptor.Value); - } - - _tfd = tfd; - } - - /// - /// Reads data from the constant buffer 1. - /// - /// Offset in bytes to read from - /// Value at the given offset - public uint ConstantBuffer1Read(int offset) - { - return MemoryMarshal.Cast(_cb1Data.Span.Slice(offset))[0]; - } - - /// - /// Prints a log message. - /// - /// Message to print - public void Log(string message) - { - Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); - } - - /// - /// Gets a span of the specified memory location, containing shader code. - /// - /// GPU virtual address of the data - /// Minimum size that the returned span may have - /// Span of the memory location - public override ReadOnlySpan GetCode(ulong address, int minimumSize) - { - return MemoryMarshal.Cast(_data.Span.Slice((int)address)); - } - - /// - /// Checks if a given memory address is mapped. - /// - /// GPU virtual address to be checked - /// True if the address is mapped, false otherwise - public bool MemoryMapped(ulong address) - { - return address < (ulong)_data.Length; - } - - /// - /// Queries Local Size X for compute shaders. - /// - /// Local Size X - public int QueryComputeLocalSizeX() - { - return _header.ComputeLocalSizeX; - } - - /// - /// Queries Local Size Y for compute shaders. - /// - /// Local Size Y - public int QueryComputeLocalSizeY() - { - return _header.ComputeLocalSizeY; - } - - /// - /// Queries Local Size Z for compute shaders. - /// - /// Local Size Z - public int QueryComputeLocalSizeZ() - { - return _header.ComputeLocalSizeZ; - } - - /// - /// Queries Local Memory size in bytes for compute shaders. - /// - /// Local Memory size in bytes - public int QueryComputeLocalMemorySize() - { - return _header.ComputeLocalMemorySize; - } - - /// - /// Queries Shared Memory size in bytes for compute shaders. - /// - /// Shared Memory size in bytes - public int QueryComputeSharedMemorySize() - { - return _header.ComputeSharedMemorySize; - } - - /// - /// Queries current primitive topology for geometry shaders. - /// - /// Current primitive topology - public InputTopology QueryPrimitiveTopology() - { - return _header.PrimitiveTopology; - } - - /// - /// Queries the tessellation evaluation shader primitive winding order. - /// - /// True if the primitive winding order is clockwise, false if counter-clockwise - public bool QueryTessCw() - { - return (_header.TessellationModePacked & 0x10) != 0; - } - - /// - /// Queries the tessellation evaluation shader abstract patch type. - /// - /// Abstract patch type - public TessPatchType QueryTessPatchType() - { - return (TessPatchType)(_header.TessellationModePacked & 3); - } - - /// - /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. - /// - /// Spacing between tessellated vertices of the patch - public TessSpacing QueryTessSpacing() - { - return (TessSpacing)((_header.TessellationModePacked >> 2) & 3); - } - - /// - /// Gets the texture descriptor for a given texture on the pool. - /// - /// Index of the texture (this is the word offset of the handle in the constant buffer) - /// Constant buffer slot for the texture handle - /// Texture descriptor - public override Image.ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) - { - if (!_textureDescriptors.TryGetValue(handle, out GuestTextureDescriptor textureDescriptor)) - { - throw new ArgumentException(); - } - - return textureDescriptor; - } - - /// - /// Queries transform feedback enable state. - /// - /// True if the shader uses transform feedback, false otherwise - public bool QueryTransformFeedbackEnabled() - { - return _tfd != null; - } - - /// - /// Queries the varying locations that should be written to the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Varying locations for the specified buffer - public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) - { - return _tfd[bufferIndex].VaryingLocations; - } - - /// - /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Stride for the specified buffer - public int QueryTransformFeedbackStride(int bufferIndex) - { - return _tfd[bufferIndex].Stride; - } - - /// - /// Queries if host state forces early depth testing. - /// - /// True if early depth testing is forced - public bool QueryEarlyZForce() - { - return (_header.StateFlags & GuestGpuStateFlags.EarlyZForce) != 0; - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs similarity index 63% rename from Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs rename to Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs index efdbc3ebe..3b4c65f3d 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs +++ b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs @@ -7,26 +7,33 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Represents a program composed of one or more shader stages (for graphics shaders), /// or a single shader (for compute shaders). /// - class ShaderBundle : IDisposable + class CachedShaderProgram : IDisposable { /// /// Host shader program object. /// public IProgram HostProgram { get; } + /// + /// GPU state used to create this version of the shader. + /// + public ShaderSpecializationState SpecializationState { get; } + /// /// Compiled shader for each shader stage. /// - public ShaderCodeHolder[] Shaders { get; } + public CachedShaderStage[] Shaders { get; } /// /// Creates a new instance of the shader bundle. /// /// Host program with all the shader stages + /// GPU state used to create this version of the shader /// Shaders - public ShaderBundle(IProgram hostProgram, params ShaderCodeHolder[] shaders) + public CachedShaderProgram(IProgram hostProgram, ShaderSpecializationState specializationState, params CachedShaderStage[] shaders) { HostProgram = hostProgram; + SpecializationState = specializationState; Shaders = shaders; } @@ -36,11 +43,6 @@ namespace Ryujinx.Graphics.Gpu.Shader public void Dispose() { HostProgram.Dispose(); - - foreach (ShaderCodeHolder holder in Shaders) - { - holder?.HostShader?.Dispose(); - } } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs b/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs new file mode 100644 index 000000000..22b08dd5a --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/CachedShaderStage.cs @@ -0,0 +1,38 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Cached shader code for a single shader stage. + /// + class CachedShaderStage + { + /// + /// Shader program information. + /// + public ShaderProgramInfo Info { get; } + + /// + /// Maxwell binary shader code. + /// + public byte[] Code { get; } + + /// + /// Constant buffer 1 data accessed by the shader. + /// + public byte[] Cb1Data { get; } + + /// + /// Creates a new instance of the shader code holder. + /// + /// Shader program information + /// Maxwell binary shader code + /// Constant buffer 1 data accessed by the shader + public CachedShaderStage(ShaderProgramInfo info, byte[] code, byte[] cb1Data) + { + Info = info; + Code = code; + Cb1Data = cb1Data; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs new file mode 100644 index 000000000..08154df32 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ComputeShaderCacheHashTable.cs @@ -0,0 +1,68 @@ +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Compute shader cache hash table. + /// + class ComputeShaderCacheHashTable + { + private readonly PartitionedHashTable _cache; + private readonly List _shaderPrograms; + + /// + /// Creates a new compute shader cache hash table. + /// + public ComputeShaderCacheHashTable() + { + _cache = new PartitionedHashTable(); + _shaderPrograms = new List(); + } + + /// + /// Adds a program to the cache. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + var specList = _cache.GetOrAdd(program.Shaders[0].Code, new ShaderSpecializationList()); + specList.Add(program); + _shaderPrograms.Add(program); + } + + /// + /// Tries to find a cached program. + /// + /// GPU channel + /// Texture pool state + /// GPU virtual address of the compute shader + /// Cached host program for the given state, if found + /// Cached guest code, if any found + /// True if a cached host program was found, false otherwise + public bool TryFind( + GpuChannel channel, + GpuChannelPoolState poolState, + ulong gpuVa, + out CachedShaderProgram program, + out byte[] cachedGuestCode) + { + program = null; + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa); + bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode); + return hasSpecList && specList.TryFindForCompute(channel, poolState, out program); + } + + /// + /// Gets all programs that have been added to the table. + /// + /// Programs added to the table + public IEnumerable GetPrograms() + { + foreach (var program in _shaderPrograms) + { + yield return program; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs new file mode 100644 index 000000000..5c5e41c69 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs @@ -0,0 +1,138 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Represents a background disk cache writer. + /// + class BackgroundDiskCacheWriter : IDisposable + { + /// + /// Possible operation to do on the . + /// + private enum CacheFileOperation + { + /// + /// Operation to add a shader to the cache. + /// + AddShader + } + + /// + /// Represents an operation to perform on the . + /// + private struct CacheFileOperationTask + { + /// + /// The type of operation to perform. + /// + public readonly CacheFileOperation Type; + + /// + /// The data associated to this operation or null. + /// + public readonly object Data; + + public CacheFileOperationTask(CacheFileOperation type, object data) + { + Type = type; + Data = data; + } + } + + /// + /// Background shader cache write information. + /// + private struct AddShaderData + { + /// + /// Cached shader program. + /// + public readonly CachedShaderProgram Program; + + /// + /// Binary host code. + /// + public readonly byte[] HostCode; + + /// + /// Creates a new background shader cache write information. + /// + /// Cached shader program + /// Binary host code + public AddShaderData(CachedShaderProgram program, byte[] hostCode) + { + Program = program; + HostCode = hostCode; + } + } + + private readonly GpuContext _context; + private readonly DiskCacheHostStorage _hostStorage; + private readonly AsyncWorkQueue _fileWriterWorkerQueue; + + /// + /// Creates a new background disk cache writer. + /// + /// GPU context + /// Disk cache host storage + public BackgroundDiskCacheWriter(GpuContext context, DiskCacheHostStorage hostStorage) + { + _context = context; + _hostStorage = hostStorage; + _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "Gpu.BackgroundDiskCacheWriter"); + } + + /// + /// Processes a shader cache background operation. + /// + /// Task to process + private void ProcessTask(CacheFileOperationTask task) + { + switch (task.Type) + { + case CacheFileOperation.AddShader: + AddShaderData data = (AddShaderData)task.Data; + try + { + _hostStorage.AddShader(_context, data.Program, data.HostCode); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {ioException.Message}"); + } + break; + } + } + + /// + /// Adds a shader program to be cached in the background. + /// + /// Shader program to cache + /// Host binary code of the program + public void AddShader(CachedShaderProgram program, byte[] hostCode) + { + _fileWriterWorkerQueue.Add(new CacheFileOperationTask(CacheFileOperation.AddShader, new AddShaderData(program, hostCode))); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _fileWriterWorkerQueue.Dispose(); + } + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs new file mode 100644 index 000000000..50e37033e --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BinarySerializer.cs @@ -0,0 +1,216 @@ +using System; +using System.IO; +using System.IO.Compression; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Binary data serializer. + /// + struct BinarySerializer + { + private readonly Stream _stream; + private Stream _activeStream; + + /// + /// Creates a new binary serializer. + /// + /// Stream to read from or write into + public BinarySerializer(Stream stream) + { + _stream = stream; + _activeStream = stream; + } + + /// + /// Reads data from the stream. + /// + /// Type of the data + /// Data read + public void Read(ref T data) where T : unmanaged + { + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// + /// Tries to read data from the stream. + /// + /// Type of the data + /// Data read + /// True if the read was successful, false otherwise + public bool TryRead(ref T data) where T : unmanaged + { + // Length is unknown on compressed streams. + if (_activeStream == _stream) + { + int size = Unsafe.SizeOf(); + if (_activeStream.Length - _activeStream.Position < size) + { + return false; + } + } + + Read(ref data); + return true; + } + + /// + /// Reads data prefixed with a magic and size from the stream. + /// + /// Type of the data + /// Data read + /// Expected magic value, for validation + public void ReadWithMagicAndSize(ref T data, uint magic) where T : unmanaged + { + uint actualMagic = 0; + int size = 0; + Read(ref actualMagic); + Read(ref size); + + if (actualMagic != magic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidMagic); + } + + // Structs are expected to expand but not shrink between versions. + if (size > Unsafe.SizeOf()) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidLength); + } + + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)).Slice(0, size); + for (int offset = 0; offset < buffer.Length;) + { + offset += _activeStream.Read(buffer.Slice(offset)); + } + } + + /// + /// Writes data into the stream. + /// + /// Type of the data + /// Data to be written + public void Write(ref T data) where T : unmanaged + { + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// + /// Writes data prefixed with a magic and size into the stream. + /// + /// Type of the data + /// Data to write + /// Magic value to write + public void WriteWithMagicAndSize(ref T data, uint magic) where T : unmanaged + { + int size = Unsafe.SizeOf(); + Write(ref magic); + Write(ref size); + Span buffer = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref data, 1)); + _activeStream.Write(buffer); + } + + /// + /// Indicates that all data that will be read from the stream has been compressed. + /// + public void BeginCompression() + { + CompressionAlgorithm algorithm = CompressionAlgorithm.None; + Read(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionMode.Decompress, true); + } + } + + /// + /// Indicates that all data that will be written into the stream should be compressed. + /// + /// Compression algorithm that should be used + public void BeginCompression(CompressionAlgorithm algorithm) + { + Write(ref algorithm); + + if (algorithm == CompressionAlgorithm.Deflate) + { + _activeStream = new DeflateStream(_stream, CompressionLevel.SmallestSize, true); + } + } + + /// + /// Indicates the end of a compressed chunck. + /// + /// + /// Any data written after this will not be compressed unless is called again. + /// Any data read after this will be assumed to be uncompressed unless is called again. + /// + public void EndCompression() + { + if (_activeStream != _stream) + { + _activeStream.Dispose(); + _activeStream = _stream; + } + } + + /// + /// Reads compressed data from the stream. + /// + /// + /// must have the exact length of the uncompressed data, + /// otherwise decompression will fail. + /// + /// Stream to read from + /// Buffer to write the uncompressed data into + public static void ReadCompressed(Stream stream, Span data) + { + CompressionAlgorithm algorithm = (CompressionAlgorithm)stream.ReadByte(); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Read(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionMode.Decompress, true); + for (int offset = 0; offset < data.Length;) + { + offset += stream.Read(data.Slice(offset)); + } + stream.Dispose(); + break; + } + } + + /// + /// Compresses and writes the compressed data into the stream. + /// + /// Stream to write into + /// Data to compress + /// Compression algorithm to be used + public static void WriteCompressed(Stream stream, ReadOnlySpan data, CompressionAlgorithm algorithm) + { + stream.WriteByte((byte)algorithm); + + switch (algorithm) + { + case CompressionAlgorithm.None: + stream.Write(data); + break; + case CompressionAlgorithm.Deflate: + stream = new DeflateStream(stream, CompressionLevel.SmallestSize, true); + stream.Write(data); + stream.Dispose(); + break; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs new file mode 100644 index 000000000..a46e1ef76 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/CompressionAlgorithm.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Algorithm used to compress the cache. + /// + enum CompressionAlgorithm : byte + { + /// + /// No compression, the data is stored as-is. + /// + None, + + /// + /// Deflate compression (RFC 1951). + /// + Deflate + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs new file mode 100644 index 000000000..c8a9f7ff2 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheCommon.cs @@ -0,0 +1,57 @@ +using Ryujinx.Common.Logging; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Common disk cache utility methods. + /// + static class DiskCacheCommon + { + /// + /// Opens a file for read or write. + /// + /// Base path of the file (should not include the file name) + /// Name of the file + /// Indicates if the file will be read or written + /// File stream + public static FileStream OpenFile(string basePath, string fileName, bool writable) + { + string fullPath = Path.Combine(basePath, fileName); + + FileMode mode; + FileAccess access; + + if (writable) + { + mode = FileMode.OpenOrCreate; + access = FileAccess.ReadWrite; + } + else + { + mode = FileMode.Open; + access = FileAccess.Read; + } + + try + { + return new FileStream(fullPath, mode, access, FileShare.Read); + } + catch (IOException ioException) + { + Logger.Error?.Print(LogClass.Gpu, $"Could not access file \"{fullPath}\". {ioException.Message}"); + + throw new DiskCacheLoadException(DiskCacheLoadResult.NoAccess); + } + } + + /// + /// Gets the compression algorithm that should be used when writing the disk cache. + /// + /// Compression algorithm + public static CompressionAlgorithm GetCompressionAlgorithm() + { + return CompressionAlgorithm.Deflate; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs new file mode 100644 index 000000000..b1c04eac0 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -0,0 +1,202 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Represents a GPU state and memory accessor. + /// + class DiskCacheGpuAccessor : GpuAccessorBase, IGpuAccessor + { + private readonly ReadOnlyMemory _data; + private readonly ReadOnlyMemory _cb1Data; + private readonly ShaderSpecializationState _oldSpecState; + private readonly ShaderSpecializationState _newSpecState; + private readonly int _stageIndex; + private ResourceCounts _resourceCounts; + + /// + /// Creates a new instance of the cached GPU state accessor for shader translation. + /// + /// GPU context + /// The data of the shader + /// The constant buffer 1 data of the shader + /// Shader specialization state of the cached shader + /// Shader specialization state of the recompiled shader + /// Shader stage index + public DiskCacheGpuAccessor( + GpuContext context, + ReadOnlyMemory data, + ReadOnlyMemory cb1Data, + ShaderSpecializationState oldSpecState, + ShaderSpecializationState newSpecState, + ResourceCounts counts, + int stageIndex) : base(context) + { + _data = data; + _cb1Data = cb1Data; + _oldSpecState = oldSpecState; + _newSpecState = newSpecState; + _stageIndex = stageIndex; + _resourceCounts = counts; + } + + /// + public uint ConstantBuffer1Read(int offset) + { + if (offset + sizeof(uint) > _cb1Data.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.InvalidCb1DataLength); + } + + return MemoryMarshal.Cast(_cb1Data.Span.Slice(offset))[0]; + } + + /// + public void Log(string message) + { + Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); + } + + /// + public ReadOnlySpan GetCode(ulong address, int minimumSize) + { + return MemoryMarshal.Cast(_data.Span.Slice((int)address)); + } + + /// + public int QueryBindingConstantBuffer(int index) + { + return _resourceCounts.UniformBuffersCount++; + } + + /// + public int QueryBindingStorageBuffer(int index) + { + return _resourceCounts.StorageBuffersCount++; + } + + /// + public int QueryBindingTexture(int index) + { + return _resourceCounts.TexturesCount++; + } + + /// + public int QueryBindingImage(int index) + { + return _resourceCounts.ImagesCount++; + } + + /// + public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX; + + /// + public int QueryComputeLocalSizeY() => _oldSpecState.ComputeState.LocalSizeY; + + /// + public int QueryComputeLocalSizeZ() => _oldSpecState.ComputeState.LocalSizeZ; + + /// + public int QueryComputeLocalMemorySize() => _oldSpecState.ComputeState.LocalMemorySize; + + /// + public int QueryComputeSharedMemorySize() => _oldSpecState.ComputeState.SharedMemorySize; + + /// + public uint QueryConstantBufferUse() + { + _newSpecState.RecordConstantBufferUse(_stageIndex, _oldSpecState.ConstantBufferUse[_stageIndex]); + return _oldSpecState.ConstantBufferUse[_stageIndex]; + } + + /// + public InputTopology QueryPrimitiveTopology() + { + _newSpecState.RecordPrimitiveTopology(); + return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode); + } + + /// + public bool QueryTessCw() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackCw(); + } + + /// + public TessPatchType QueryTessPatchType() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackPatchType(); + } + + /// + public TessSpacing QueryTessSpacing() + { + return _oldSpecState.GraphicsState.TessellationMode.UnpackSpacing(); + } + + /// + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _newSpecState.RecordTextureFormat(_stageIndex, handle, cbufSlot); + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + return ConvertToTextureFormat(format, formatSrgb); + } + + /// + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _newSpecState.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot).ConvertSamplerType(); + } + + /// + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _newSpecState.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + } + + /// + public bool QueryTransformFeedbackEnabled() + { + return _oldSpecState.TransformFeedbackDescriptors != null; + } + + /// + public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].AsSpan(); + } + + /// + public int QueryTransformFeedbackStride(int bufferIndex) + { + return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].Stride; + } + + /// + public bool QueryEarlyZForce() + { + _newSpecState.RecordEarlyZForce(); + return _oldSpecState.GraphicsState.EarlyZForce; + } + + /// + public void RegisterTexture(int handle, int cbufSlot) + { + if (!_oldSpecState.TextureRegistered(_stageIndex, handle, cbufSlot)) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.MissingTextureDescriptor); + } + + (uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot); + TextureTarget target = _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot); + bool coordNormalized = _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); + _newSpecState.RegisterTexture(_stageIndex, handle, cbufSlot, format, formatSrgb, target, coordNormalized); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs new file mode 100644 index 000000000..4e338094f --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs @@ -0,0 +1,459 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// On-disk shader cache storage for guest code. + /// + class DiskCacheGuestStorage + { + private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24); + + private const ushort VersionMajor = 1; + private const ushort VersionMinor = 0; + private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor; + + private const string TocFileName = "guest.toc"; + private const string DataFileName = "guest.data"; + + private readonly string _basePath; + + /// + /// TOC (Table of contents) file header. + /// + private struct TocHeader + { + /// + /// Magic value, for validation and identification purposes. + /// + public uint Magic; + + /// + /// File format version. + /// + public uint Version; + + /// + /// Header padding. + /// + public uint Padding; + + /// + /// Number of modifications to the file, also the shaders count. + /// + public uint ModificationsCount; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved2; + } + + /// + /// TOC (Table of contents) file entry. + /// + private struct TocEntry + { + /// + /// Offset of the data on the data file. + /// + public uint Offset; + + /// + /// Code size. + /// + public uint CodeSize; + + /// + /// Constant buffer 1 data size. + /// + public uint Cb1DataSize; + + /// + /// Hash of the code and constant buffer data. + /// + public uint Hash; + } + + /// + /// TOC (Table of contents) memory cache entry. + /// + private struct TocMemoryEntry + { + /// + /// Offset of the data on the data file. + /// + public uint Offset; + + /// + /// Code size. + /// + public uint CodeSize; + + /// + /// Constant buffer 1 data size. + /// + public uint Cb1DataSize; + + /// + /// Index of the shader on the cache. + /// + public readonly int Index; + + /// + /// Creates a new TOC memory entry. + /// + /// Offset of the data on the data file + /// Code size + /// Constant buffer 1 data size + /// Index of the shader on the cache + public TocMemoryEntry(uint offset, uint codeSize, uint cb1DataSize, int index) + { + Offset = offset; + CodeSize = codeSize; + Cb1DataSize = cb1DataSize; + Index = index; + } + } + + private Dictionary> _toc; + private uint _tocModificationsCount; + + private (byte[], byte[])[] _cache; + + /// + /// Creates a new disk cache guest storage. + /// + /// Base path of the disk shader cache + public DiskCacheGuestStorage(string basePath) + { + _basePath = basePath; + } + + /// + /// Checks if the TOC (table of contents) file for the guest cache exists. + /// + /// True if the file exists, false otherwise + public bool TocFileExists() + { + return File.Exists(Path.Combine(_basePath, TocFileName)); + } + + /// + /// Checks if the data file for the guest cache exists. + /// + /// True if the file exists, false otherwise + public bool DataFileExists() + { + return File.Exists(Path.Combine(_basePath, DataFileName)); + } + + /// + /// Opens the guest cache TOC (table of contents) file. + /// + /// File stream + public Stream OpenTocFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: false); + } + + /// + /// Opens the guest cache data file. + /// + /// File stream + public Stream OpenDataFileStream() + { + return DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: false); + } + + /// + /// Clear all content from the guest cache files. + /// + public void ClearCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Loads the guest cache from file or memory cache. + /// + /// Guest TOC file stream + /// Guest data file stream + /// Guest shader index + /// Tuple with the guest code and constant buffer 1 data, respectively + public (byte[], byte[]) LoadShader(Stream tocFileStream, Stream dataFileStream, int index) + { + if (_cache == null || index >= _cache.Length) + { + _cache = new (byte[], byte[])[Math.Max(index + 1, GetShadersCountFromLength(tocFileStream.Length))]; + } + + (byte[] guestCode, byte[] cb1Data) = _cache[index]; + + if (guestCode == null || cb1Data == null) + { + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + tocFileStream.Seek(Unsafe.SizeOf() + index * Unsafe.SizeOf(), SeekOrigin.Begin); + + TocEntry entry = new TocEntry(); + tocReader.Read(ref entry); + + guestCode = new byte[entry.CodeSize]; + cb1Data = new byte[entry.Cb1DataSize]; + + if (entry.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + dataFileStream.Read(cb1Data); + BinarySerializer.ReadCompressed(dataFileStream, guestCode); + + _cache[index] = (guestCode, cb1Data); + } + + return (guestCode, cb1Data); + } + + /// + /// Clears guest code memory cache, forcing future loads to be from file. + /// + public void ClearMemoryCache() + { + _cache = null; + } + + /// + /// Calculates the guest shaders count from the TOC file length. + /// + /// TOC file length + /// Shaders count + private static int GetShadersCountFromLength(long length) + { + return (int)((length - Unsafe.SizeOf()) / Unsafe.SizeOf()); + } + + /// + /// Adds a guest shader to the cache. + /// + /// + /// If the shader is already on the cache, the existing index will be returned and nothing will be written. + /// + /// Guest code + /// Constant buffer 1 data accessed by the code + /// Index of the shader on the cache + public int AddShader(ReadOnlySpan data, ReadOnlySpan cb1Data) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true); + + TocHeader header = new TocHeader(); + + LoadOrCreateToc(tocFileStream, ref header); + + uint hash = CalcHash(data, cb1Data); + + if (_toc.TryGetValue(hash, out var list)) + { + foreach (var entry in list) + { + if (data.Length != entry.CodeSize || cb1Data.Length != entry.Cb1DataSize) + { + continue; + } + + dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin); + byte[] cachedCode = new byte[entry.CodeSize]; + byte[] cachedCb1Data = new byte[entry.Cb1DataSize]; + dataFileStream.Read(cachedCb1Data); + BinarySerializer.ReadCompressed(dataFileStream, cachedCode); + + if (data.SequenceEqual(cachedCode) && cb1Data.SequenceEqual(cachedCb1Data)) + { + return entry.Index; + } + } + } + + return WriteNewEntry(tocFileStream, dataFileStream, ref header, data, cb1Data, hash); + } + + /// + /// Loads the guest cache TOC file, or create a new one if not present. + /// + /// Guest TOC file stream + /// Set to the TOC file header + private void LoadOrCreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer reader = new BinarySerializer(tocFileStream); + + if (!reader.TryRead(ref header) || header.Magic != TocMagic || header.Version != VersionPacked) + { + CreateToc(tocFileStream, ref header); + } + + if (_toc == null || header.ModificationsCount != _tocModificationsCount) + { + if (!LoadTocEntries(tocFileStream, ref reader)) + { + CreateToc(tocFileStream, ref header); + } + + _tocModificationsCount = header.ModificationsCount; + } + } + + /// + /// Creates a new guest cache TOC file. + /// + /// Guest TOC file stream + /// Set to the TOC header + private void CreateToc(Stream tocFileStream, ref TocHeader header) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = TocMagic; + header.Version = VersionPacked; + header.Padding = 0; + header.ModificationsCount = 0; + header.Reserved = 0; + header.Reserved2 = 0; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// + /// Reads all the entries on the guest TOC file. + /// + /// Guest TOC file stream + /// TOC file reader + /// True if the operation was successful, false otherwise + private bool LoadTocEntries(Stream tocFileStream, ref BinarySerializer reader) + { + _toc = new Dictionary>(); + + TocEntry entry = new TocEntry(); + int index = 0; + + while (tocFileStream.Position < tocFileStream.Length) + { + if (!reader.TryRead(ref entry)) + { + return false; + } + + AddTocMemoryEntry(entry.Offset, entry.CodeSize, entry.Cb1DataSize, entry.Hash, index++); + } + + return true; + } + + /// + /// Writes a new guest code entry into the file. + /// + /// TOC file stream + /// Data file stream + /// TOC header, to be updated with the new count + /// Guest code + /// Constant buffer 1 data accessed by the guest code + /// Code and constant buffer data hash + /// Entry index + private int WriteNewEntry( + Stream tocFileStream, + Stream dataFileStream, + ref TocHeader header, + ReadOnlySpan data, + ReadOnlySpan cb1Data, + uint hash) + { + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + + dataFileStream.Seek(0, SeekOrigin.End); + uint dataOffset = checked((uint)dataFileStream.Position); + uint codeSize = (uint)data.Length; + uint cb1DataSize = (uint)cb1Data.Length; + dataFileStream.Write(cb1Data); + BinarySerializer.WriteCompressed(dataFileStream, data, DiskCacheCommon.GetCompressionAlgorithm()); + + _tocModificationsCount = ++header.ModificationsCount; + tocFileStream.Seek(0, SeekOrigin.Begin); + tocWriter.Write(ref header); + + TocEntry entry = new TocEntry() + { + Offset = dataOffset, + CodeSize = codeSize, + Cb1DataSize = cb1DataSize, + Hash = hash + }; + + tocFileStream.Seek(0, SeekOrigin.End); + int index = (int)((tocFileStream.Position - Unsafe.SizeOf()) / Unsafe.SizeOf()); + + tocWriter.Write(ref entry); + + AddTocMemoryEntry(dataOffset, codeSize, cb1DataSize, hash, index); + + return index; + } + + /// + /// Adds an entry to the memory TOC cache. This can be used to avoid reading the TOC file all the time. + /// + /// Offset of the code and constant buffer data in the data file + /// Code size + /// Constant buffer 1 data size + /// Code and constant buffer data hash + /// Index of the data on the cache + private void AddTocMemoryEntry(uint dataOffset, uint codeSize, uint cb1DataSize, uint hash, int index) + { + if (!_toc.TryGetValue(hash, out var list)) + { + _toc.Add(hash, list = new List()); + } + + list.Add(new TocMemoryEntry(dataOffset, codeSize, cb1DataSize, index)); + } + + /// + /// Calculates the hash for a data pair. + /// + /// Data 1 + /// Data 2 + /// Hash of both data + private static uint CalcHash(ReadOnlySpan data, ReadOnlySpan data2) + { + return CalcHash(data2) * 23 ^ CalcHash(data); + } + + /// + /// Calculates the hash for data. + /// + /// Data to be hashed + /// Hash of the data + private static uint CalcHash(ReadOnlySpan data) + { + return (uint)XXHash128.ComputeHash(data).Low; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs new file mode 100644 index 000000000..0028e8796 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -0,0 +1,763 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using System; +using System.IO; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// On-disk shader cache storage for host code. + /// + class DiskCacheHostStorage + { + private const uint TocsMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'S' << 24); + private const uint TochMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'H' << 24); + private const uint ShdiMagic = (byte)'S' | ((byte)'H' << 8) | ((byte)'D' << 16) | ((byte)'I' << 24); + private const uint BufdMagic = (byte)'B' | ((byte)'U' << 8) | ((byte)'F' << 16) | ((byte)'D' << 24); + private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24); + + private const ushort FileFormatVersionMajor = 1; + private const ushort FileFormatVersionMinor = 1; + private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; + private const uint CodeGenVersion = 0; + + private const string SharedTocFileName = "shared.toc"; + private const string SharedDataFileName = "shared.data"; + + private readonly string _basePath; + + public bool CacheEnabled => !string.IsNullOrEmpty(_basePath); + + /// + /// TOC (Table of contents) file header. + /// + private struct TocHeader + { + /// + /// Magic value, for validation and identification. + /// + public uint Magic; + + /// + /// File format version. + /// + public uint FormatVersion; + + /// + /// Generated shader code version. + /// + public uint CodeGenVersion; + + /// + /// Header padding. + /// + public uint Padding; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved; + + /// + /// Reserved space, to be used in the future. Write as zero. + /// + public ulong Reserved2; + } + + /// + /// Offset and size pair. + /// + private struct OffsetAndSize + { + /// + /// Offset. + /// + public ulong Offset; + + /// + /// Size. + /// + public uint Size; + } + + /// + /// Per-stage data entry. + /// + private struct DataEntryPerStage + { + /// + /// Index of the guest code on the guest code cache TOC file. + /// + public int GuestCodeIndex; + } + + /// + /// Per-program data entry. + /// + private struct DataEntry + { + /// + /// Bit mask where each bit set is a used shader stage. Should be zero for compute shaders. + /// + public uint StagesBitMask; + } + + /// + /// Per-stage shader information, returned by the translator. + /// + private struct DataShaderInfo + { + /// + /// Total constant buffers used. + /// + public ushort CBuffersCount; + + /// + /// Total storage buffers used. + /// + public ushort SBuffersCount; + + /// + /// Total textures used. + /// + public ushort TexturesCount; + + /// + /// Total images used. + /// + public ushort ImagesCount; + + /// + /// Shader stage. + /// + public ShaderStage Stage; + + /// + /// Indicates if the shader accesses the Instance ID built-in variable. + /// + public bool UsesInstanceId; + + /// + /// Indicates if the shader modifies the Layer built-in variable. + /// + public bool UsesRtLayer; + + /// + /// Bit mask with the clip distances written on the vertex stage. + /// + public byte ClipDistancesWritten; + + /// + /// Bit mask of the render target components written by the fragment stage. + /// + public int FragmentOutputMap; + } + + private readonly DiskCacheGuestStorage _guestStorage; + + /// + /// Creates a disk cache host storage. + /// + /// Base path of the shader cache + public DiskCacheHostStorage(string basePath) + { + _basePath = basePath; + _guestStorage = new DiskCacheGuestStorage(basePath); + + if (CacheEnabled) + { + Directory.CreateDirectory(basePath); + } + } + + /// + /// Gets the total of host programs on the cache. + /// + /// Host programs count + public int GetProgramCount() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + + if (!File.Exists(tocFilePath)) + { + return 0; + } + + return (int)((new FileInfo(tocFilePath).Length - Unsafe.SizeOf()) / sizeof(ulong)); + } + + /// + /// Guest the name of the host program cache file, with extension. + /// + /// GPU context + /// Name of the file, without extension + private static string GetHostFileName(GpuContext context) + { + string apiName = context.Capabilities.Api.ToString().ToLowerInvariant(); + string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant()); + return $"{apiName}_{vendorName}"; + } + + /// + /// Removes invalid path characters and spaces from a file name. + /// + /// File name + /// Filtered file name + private static string RemoveInvalidCharacters(string fileName) + { + int indexOfSpace = fileName.IndexOf(' '); + if (indexOfSpace >= 0) + { + fileName = fileName.Substring(0, indexOfSpace); + } + + return string.Concat(fileName.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries)); + } + + /// + /// Gets the name of the TOC host file. + /// + /// GPU context + /// File name + private static string GetHostTocFileName(GpuContext context) + { + return GetHostFileName(context) + ".toc"; + } + + /// + /// Gets the name of the data host file. + /// + /// GPU context + /// File name + private static string GetHostDataFileName(GpuContext context) + { + return GetHostFileName(context) + ".data"; + } + + /// + /// Checks if a disk cache exists for the current application. + /// + /// True if a disk cache exists, false otherwise + public bool CacheExists() + { + string tocFilePath = Path.Combine(_basePath, SharedTocFileName); + string dataFilePath = Path.Combine(_basePath, SharedDataFileName); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath) || !_guestStorage.TocFileExists() || !_guestStorage.DataFileExists()) + { + return false; + } + + return true; + } + + /// + /// Loads all shaders from the cache. + /// + /// GPU context + /// Parallel disk cache loader + public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader) + { + if (!CacheExists()) + { + return; + } + + Stream hostTocFileStream = null; + Stream hostDataFileStream = null; + + try + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: false); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: false); + + using var guestTocFileStream = _guestStorage.OpenTocFileStream(); + using var guestDataFileStream = _guestStorage.OpenDataFileStream(); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + TocHeader header = new TocHeader(); + + if (!tocReader.TryRead(ref header) || header.Magic != TocsMagic) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + if (header.FormatVersion != FileFormatVersionPacked) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.IncompatibleVersion); + } + + bool loadHostCache = header.CodeGenVersion == CodeGenVersion; + + int programIndex = 0; + + DataEntry entry = new DataEntry(); + + while (tocFileStream.Position < tocFileStream.Length && loader.Active) + { + ulong dataOffset = 0; + tocReader.Read(ref dataOffset); + + if ((ulong)dataOffset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)dataOffset, SeekOrigin.Begin); + + dataReader.BeginCompression(); + dataReader.Read(ref entry); + uint stagesBitMask = entry.StagesBitMask; + + if ((stagesBitMask & ~0x3fu) != 0) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + bool isCompute = stagesBitMask == 0; + if (isCompute) + { + stagesBitMask = 1; + } + + CachedShaderStage[] shaders = new CachedShaderStage[isCompute ? 1 : Constants.ShaderStages + 1]; + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + while (stagesBitMask != 0) + { + int stageIndex = BitOperations.TrailingZeroCount(stagesBitMask); + + dataReader.Read(ref stageEntry); + + ShaderProgramInfo info = stageIndex != 0 || isCompute ? ReadShaderProgramInfo(ref dataReader) : null; + + (byte[] guestCode, byte[] cb1Data) = _guestStorage.LoadShader( + guestTocFileStream, + guestDataFileStream, + stageEntry.GuestCodeIndex); + + shaders[stageIndex] = new CachedShaderStage(info, guestCode, cb1Data); + + stagesBitMask &= ~(1u << stageIndex); + } + + ShaderSpecializationState specState = ShaderSpecializationState.Read(ref dataReader); + dataReader.EndCompression(); + + if (loadHostCache) + { + byte[] hostCode = ReadHostCode(context, ref hostTocFileStream, ref hostDataFileStream, programIndex); + + if (hostCode != null) + { + bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null; + int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1; + IProgram hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); + + CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders); + + loader.QueueHostProgram(program, hostProgram, programIndex, isCompute); + } + else + { + loadHostCache = false; + } + } + + if (!loadHostCache) + { + loader.QueueGuestProgram(shaders, specState, programIndex, isCompute); + } + + loader.CheckCompilation(); + programIndex++; + } + } + finally + { + _guestStorage.ClearMemoryCache(); + + hostTocFileStream?.Dispose(); + hostDataFileStream?.Dispose(); + } + } + + /// + /// Reads the host code for a given shader, if existent. + /// + /// GPU context + /// Host TOC file stream, intialized if needed + /// Host data file stream, initialized if needed + /// Index of the program on the cache + /// Host binary code, or null if not found + private byte[] ReadHostCode(GpuContext context, ref Stream tocFileStream, ref Stream dataFileStream, int programIndex) + { + if (tocFileStream == null && dataFileStream == null) + { + string tocFilePath = Path.Combine(_basePath, GetHostTocFileName(context)); + string dataFilePath = Path.Combine(_basePath, GetHostDataFileName(context)); + + if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath)) + { + return null; + } + + tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false); + dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: false); + } + + int offset = Unsafe.SizeOf() + programIndex * Unsafe.SizeOf(); + if (offset + Unsafe.SizeOf() > tocFileStream.Length) + { + return null; + } + + if ((ulong)offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + tocFileStream.Seek(offset, SeekOrigin.Begin); + + BinarySerializer tocReader = new BinarySerializer(tocFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + tocReader.Read(ref offsetAndSize); + + if (offsetAndSize.Offset >= (ulong)dataFileStream.Length) + { + throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric); + } + + dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin); + + byte[] hostCode = new byte[offsetAndSize.Size]; + + BinarySerializer.ReadCompressed(dataFileStream, hostCode); + + return hostCode; + } + + /// + /// Gets output streams for the disk cache, for faster batch writing. + /// + /// The GPU context, used to determine the host disk cache + /// A collection of disk cache output streams + public DiskCacheOutputStreams GetOutputStreams(GpuContext context) + { + var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + var hostTocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var hostDataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + return new DiskCacheOutputStreams(tocFileStream, dataFileStream, hostTocFileStream, hostDataFileStream); + } + + /// + /// Adds a shader to the cache. + /// + /// GPU context + /// Cached program + /// Optional host binary code + /// Output streams to use + public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlySpan hostCode, DiskCacheOutputStreams streams = null) + { + uint stagesBitMask = 0; + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null || (shader.Info != null && shader.Info.Stage == ShaderStage.Compute)) + { + continue; + } + + stagesBitMask |= 1u << index; + } + + var tocFileStream = streams != null ? streams.TocFileStream : DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + var dataFileStream = streams != null ? streams.DataFileStream : DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TocsMagic, CodeGenVersion); + } + + tocFileStream.Seek(0, SeekOrigin.End); + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); + + ulong dataOffset = (ulong)dataFileStream.Position; + tocWriter.Write(ref dataOffset); + + DataEntry entry = new DataEntry(); + + entry.StagesBitMask = stagesBitMask; + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + dataWriter.Write(ref entry); + + DataEntryPerStage stageEntry = new DataEntryPerStage(); + + for (int index = 0; index < program.Shaders.Length; index++) + { + var shader = program.Shaders[index]; + if (shader == null) + { + continue; + } + + stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data); + + dataWriter.Write(ref stageEntry); + + WriteShaderProgramInfo(ref dataWriter, shader.Info); + } + + program.SpecializationState.Write(ref dataWriter); + dataWriter.EndCompression(); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + + if (hostCode.IsEmpty) + { + return; + } + + WriteHostCode(context, hostCode, -1, streams); + } + + /// + /// Clears all content from the guest cache files. + /// + public void ClearGuestCache() + { + _guestStorage.ClearCache(); + } + + /// + /// Clears all content from the shared cache files. + /// + /// GPU context + public void ClearSharedCache() + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Deletes all content from the host cache files. + /// + /// GPU context + public void ClearHostCache(GpuContext context) + { + using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + tocFileStream.SetLength(0); + dataFileStream.SetLength(0); + } + + /// + /// Adds a host binary shader to the host cache. + /// + /// + /// This only modifies the host cache. The shader must already exist in the other caches. + /// This method should only be used for rebuilding the host cache after a clear. + /// + /// GPU context + /// Host binary code + /// Index of the program in the cache + public void AddHostShader(GpuContext context, ReadOnlySpan hostCode, int programIndex) + { + WriteHostCode(context, hostCode, programIndex); + } + + /// + /// Writes the host binary code on the host cache. + /// + /// GPU context + /// Host binary code + /// Index of the program in the cache + /// Output streams to use + private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, int programIndex, DiskCacheOutputStreams streams = null) + { + var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); + var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); + + if (tocFileStream.Length == 0) + { + TocHeader header = new TocHeader(); + CreateToc(tocFileStream, ref header, TochMagic, 0); + } + + if (programIndex == -1) + { + tocFileStream.Seek(0, SeekOrigin.End); + } + else + { + tocFileStream.Seek(Unsafe.SizeOf() + (programIndex * Unsafe.SizeOf()), SeekOrigin.Begin); + } + + dataFileStream.Seek(0, SeekOrigin.End); + + BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + + OffsetAndSize offsetAndSize = new OffsetAndSize(); + offsetAndSize.Offset = (ulong)dataFileStream.Position; + offsetAndSize.Size = (uint)hostCode.Length; + tocWriter.Write(ref offsetAndSize); + + BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm()); + + if (streams == null) + { + tocFileStream.Dispose(); + dataFileStream.Dispose(); + } + } + + /// + /// Creates a TOC file for the host or shared cache. + /// + /// TOC file stream + /// Set to the TOC file header + /// Magic value to be written + /// Shader codegen version, only valid for the host file + private void CreateToc(Stream tocFileStream, ref TocHeader header, uint magic, uint codegenVersion) + { + BinarySerializer writer = new BinarySerializer(tocFileStream); + + header.Magic = magic; + header.FormatVersion = FileFormatVersionPacked; + header.CodeGenVersion = codegenVersion; + header.Padding = 0; + header.Reserved = 0; + header.Reserved2 = 0; + + if (tocFileStream.Length > 0) + { + tocFileStream.Seek(0, SeekOrigin.Begin); + tocFileStream.SetLength(0); + } + + writer.Write(ref header); + } + + /// + /// Reads the shader program info from the cache. + /// + /// Cache data reader + /// Shader program info + private static ShaderProgramInfo ReadShaderProgramInfo(ref BinarySerializer dataReader) + { + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataReader.ReadWithMagicAndSize(ref dataInfo, ShdiMagic); + + BufferDescriptor[] cBuffers = new BufferDescriptor[dataInfo.CBuffersCount]; + BufferDescriptor[] sBuffers = new BufferDescriptor[dataInfo.SBuffersCount]; + TextureDescriptor[] textures = new TextureDescriptor[dataInfo.TexturesCount]; + TextureDescriptor[] images = new TextureDescriptor[dataInfo.ImagesCount]; + + for (int index = 0; index < dataInfo.CBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref cBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.SBuffersCount; index++) + { + dataReader.ReadWithMagicAndSize(ref sBuffers[index], BufdMagic); + } + + for (int index = 0; index < dataInfo.TexturesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref textures[index], TexdMagic); + } + + for (int index = 0; index < dataInfo.ImagesCount; index++) + { + dataReader.ReadWithMagicAndSize(ref images[index], TexdMagic); + } + + return new ShaderProgramInfo( + cBuffers, + sBuffers, + textures, + images, + dataInfo.Stage, + dataInfo.UsesInstanceId, + dataInfo.UsesRtLayer, + dataInfo.ClipDistancesWritten, + dataInfo.FragmentOutputMap); + } + + /// + /// Writes the shader program info into the cache. + /// + /// Cache data writer + /// Program info + private static void WriteShaderProgramInfo(ref BinarySerializer dataWriter, ShaderProgramInfo info) + { + if (info == null) + { + return; + } + + DataShaderInfo dataInfo = new DataShaderInfo(); + + dataInfo.CBuffersCount = (ushort)info.CBuffers.Count; + dataInfo.SBuffersCount = (ushort)info.SBuffers.Count; + dataInfo.TexturesCount = (ushort)info.Textures.Count; + dataInfo.ImagesCount = (ushort)info.Images.Count; + dataInfo.Stage = info.Stage; + dataInfo.UsesInstanceId = info.UsesInstanceId; + dataInfo.UsesRtLayer = info.UsesRtLayer; + dataInfo.ClipDistancesWritten = info.ClipDistancesWritten; + dataInfo.FragmentOutputMap = info.FragmentOutputMap; + + dataWriter.WriteWithMagicAndSize(ref dataInfo, ShdiMagic); + + for (int index = 0; index < info.CBuffers.Count; index++) + { + var entry = info.CBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.SBuffers.Count; index++) + { + var entry = info.SBuffers[index]; + dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic); + } + + for (int index = 0; index < info.Textures.Count; index++) + { + var entry = info.Textures[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + + for (int index = 0; index < info.Images.Count; index++) + { + var entry = info.Images[index]; + dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs new file mode 100644 index 000000000..d6e23302c --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadException.cs @@ -0,0 +1,48 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Disk cache load exception. + /// + class DiskCacheLoadException : Exception + { + /// + /// Result of the cache load operation. + /// + public DiskCacheLoadResult Result { get; } + + /// + /// Creates a new instance of the disk cache load exception. + /// + public DiskCacheLoadException() + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Exception message + public DiskCacheLoadException(string message) : base(message) + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Exception message + /// Inner exception + public DiskCacheLoadException(string message, Exception inner) : base(message, inner) + { + } + + /// + /// Creates a new instance of the disk cache load exception. + /// + /// Result code + public DiskCacheLoadException(DiskCacheLoadResult result) : base(result.GetMessage()) + { + Result = result; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs new file mode 100644 index 000000000..b3ffa4a73 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheLoadResult.cs @@ -0,0 +1,72 @@ +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Result of a shader cache load operation. + /// + enum DiskCacheLoadResult + { + /// + /// No error. + /// + Success, + + /// + /// File can't be accessed. + /// + NoAccess, + + /// + /// The constant buffer 1 data length is too low for the translation of the guest shader. + /// + InvalidCb1DataLength, + + /// + /// The cache is missing the descriptor of a texture used by the shader. + /// + MissingTextureDescriptor, + + /// + /// File is corrupted. + /// + FileCorruptedGeneric, + + /// + /// File is corrupted, detected by magic value check. + /// + FileCorruptedInvalidMagic, + + /// + /// File is corrupted, detected by length check. + /// + FileCorruptedInvalidLength, + + /// + /// File might be valid, but is incompatible with the current emulator version. + /// + IncompatibleVersion + } + + static class DiskCacheLoadResultExtensions + { + /// + /// Gets an error message from a result code. + /// + /// Result code + /// Error message + public static string GetMessage(this DiskCacheLoadResult result) + { + return result switch + { + DiskCacheLoadResult.Success => "No error.", + DiskCacheLoadResult.NoAccess => "Could not access the cache file.", + DiskCacheLoadResult.InvalidCb1DataLength => "Constant buffer 1 data length is too low.", + DiskCacheLoadResult.MissingTextureDescriptor => "Texture descriptor missing from the cache file.", + DiskCacheLoadResult.FileCorruptedGeneric => "The cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidMagic => "Magic check failed, the cache file is corrupted.", + DiskCacheLoadResult.FileCorruptedInvalidLength => "Length check failed, the cache file is corrupted.", + DiskCacheLoadResult.IncompatibleVersion => "The version of the disk cache is not compatible with this version of the emulator.", + _ => "Unknown error." + }; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs new file mode 100644 index 000000000..1e0df2647 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheOutputStreams.cs @@ -0,0 +1,57 @@ +using System; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Output streams for the disk shader cache. + /// + class DiskCacheOutputStreams : IDisposable + { + /// + /// Shared table of contents (TOC) file stream. + /// + public readonly FileStream TocFileStream; + + /// + /// Shared data file stream. + /// + public readonly FileStream DataFileStream; + + /// + /// Host table of contents (TOC) file stream. + /// + public readonly FileStream HostTocFileStream; + + /// + /// Host data file stream. + /// + public readonly FileStream HostDataFileStream; + + /// + /// Creates a new instance of a disk cache output stream container. + /// + /// Stream for the shared table of contents file + /// Stream for the shared data file + /// Stream for the host table of contents file + /// Stream for the host data file + public DiskCacheOutputStreams(FileStream tocFileStream, FileStream dataFileStream, FileStream hostTocFileStream, FileStream hostDataFileStream) + { + TocFileStream = tocFileStream; + DataFileStream = dataFileStream; + HostTocFileStream = hostTocFileStream; + HostDataFileStream = hostDataFileStream; + } + + /// + /// Disposes the output file streams. + /// + public void Dispose() + { + TocFileStream.Dispose(); + DataFileStream.Dispose(); + HostTocFileStream.Dispose(); + HostDataFileStream.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs new file mode 100644 index 000000000..af7579d5d --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -0,0 +1,672 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using static Ryujinx.Graphics.Gpu.Shader.ShaderCache; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + class ParallelDiskCacheLoader + { + private const int ThreadCount = 8; + + private readonly GpuContext _context; + private readonly ShaderCacheHashTable _graphicsCache; + private readonly ComputeShaderCacheHashTable _computeCache; + private readonly DiskCacheHostStorage _hostStorage; + private readonly CancellationToken _cancellationToken; + private readonly Action _stateChangeCallback; + + /// + /// Indicates if the cache should be loaded. + /// + public bool Active => !_cancellationToken.IsCancellationRequested; + + private bool _needsHostRegen; + + /// + /// Number of shaders that failed to compile from the cache. + /// + public int ErrorCount { get; private set; } + + /// + /// Program validation entry. + /// + private struct ProgramEntry + { + /// + /// Cached shader program. + /// + public readonly CachedShaderProgram CachedProgram; + + /// + /// Host program. + /// + public readonly IProgram HostProgram; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Indicates if the program is a host binary shader. + /// + public readonly bool IsBinary; + + /// + /// Creates a new program validation entry. + /// + /// Cached shader program + /// Host program + /// Program index + /// Indicates if the program is a compute shader + /// Indicates if the program is a host binary shader + public ProgramEntry( + CachedShaderProgram cachedProgram, + IProgram hostProgram, + int programIndex, + bool isCompute, + bool isBinary) + { + CachedProgram = cachedProgram; + HostProgram = hostProgram; + ProgramIndex = programIndex; + IsCompute = isCompute; + IsBinary = isBinary; + } + } + + /// + /// Translated shader compilation entry. + /// + private struct ProgramCompilation + { + /// + /// Translated shader stages. + /// + public readonly ShaderProgram[] TranslatedStages; + + /// + /// Cached shaders. + /// + public readonly CachedShaderStage[] Shaders; + + /// + /// Specialization state. + /// + public readonly ShaderSpecializationState SpecializationState; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Creates a new translated shader compilation entry. + /// + /// Translated shader stages + /// Cached shaders + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public ProgramCompilation( + ShaderProgram[] translatedStages, + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + TranslatedStages = translatedStages; + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + /// + /// Program translation entry. + /// + private struct AsyncProgramTranslation + { + /// + /// Cached shader stages. + /// + public readonly CachedShaderStage[] Shaders; + + /// + /// Specialization state. + /// + public readonly ShaderSpecializationState SpecializationState; + + /// + /// Program index. + /// + public readonly int ProgramIndex; + + /// + /// Indicates if the program is a compute shader. + /// + public readonly bool IsCompute; + + /// + /// Creates a new program translation entry. + /// + /// Cached shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public AsyncProgramTranslation( + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + private readonly Queue _validationQueue; + private readonly ConcurrentQueue _compilationQueue; + private readonly BlockingCollection _asyncTranslationQueue; + private readonly SortedList _programList; + + private int _backendParallelCompileThreads; + private int _compiledCount; + private int _totalCount; + + /// + /// Creates a new parallel disk cache loader. + /// + /// GPU context + /// Graphics shader cache + /// Compute shader cache + /// Disk cache host storage + /// Cancellation token + /// Function to be called when there is a state change, reporting state, compiled and total shaders count + public ParallelDiskCacheLoader( + GpuContext context, + ShaderCacheHashTable graphicsCache, + ComputeShaderCacheHashTable computeCache, + DiskCacheHostStorage hostStorage, + CancellationToken cancellationToken, + Action stateChangeCallback) + { + _context = context; + _graphicsCache = graphicsCache; + _computeCache = computeCache; + _hostStorage = hostStorage; + _cancellationToken = cancellationToken; + _stateChangeCallback = stateChangeCallback; + _validationQueue = new Queue(); + _compilationQueue = new ConcurrentQueue(); + _asyncTranslationQueue = new BlockingCollection(ThreadCount); + _programList = new SortedList(); + _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. + } + + /// + /// Loads all shaders from the cache. + /// + public void LoadShaders() + { + Thread[] workThreads = new Thread[ThreadCount]; + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index] = new Thread(ProcessAsyncQueue) + { + Name = $"Gpu.AsyncTranslationThread.{index}" + }; + } + + int programCount = _hostStorage.GetProgramCount(); + + _compiledCount = 0; + _totalCount = programCount; + + _stateChangeCallback(ShaderCacheState.Start, 0, programCount); + + Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache..."); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Start(_cancellationToken); + } + + try + { + _hostStorage.LoadShaders(_context, this); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}"); + + // If we can't even access the file, then we also can't rebuild. + if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess) + { + _needsHostRegen = true; + } + } + catch (InvalidDataException invalidDataException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}"); + _needsHostRegen = true; + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}"); + _needsHostRegen = true; + } + + _asyncTranslationQueue.CompleteAdding(); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Join(); + } + + CheckCompilationBlocking(); + + if (_needsHostRegen) + { + // Rebuild both shared and host cache files. + // Rebuilding shared is required because the shader information returned by the translator + // might have changed, and so we have to reconstruct the file with the new information. + try + { + _hostStorage.ClearSharedCache(); + _hostStorage.ClearHostCache(_context); + + if (_programList.Count != 0) + { + Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders..."); + + using var streams = _hostStorage.GetOutputStreams(_context); + + foreach (var kv in _programList) + { + if (!Active) + { + break; + } + + CachedShaderProgram program = kv.Value; + _hostStorage.AddShader(_context, program, program.HostProgram.GetBinary(), streams); + } + + Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); + } + else + { + _hostStorage.ClearGuestCache(); + + Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption."); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}"); + } + } + + Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded."); + + _stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount); + } + + /// + /// Enqueues a host program for compilation. + /// + /// Cached program + /// Host program to be compiled + /// Program index + /// Indicates if the program is a compute shader + public void QueueHostProgram(CachedShaderProgram cachedProgram, IProgram hostProgram, int programIndex, bool isCompute) + { + EnqueueForValidation(new ProgramEntry(cachedProgram, hostProgram, programIndex, isCompute, isBinary: true)); + } + + /// + /// Enqueues a guest program for compilation. + /// + /// Cached shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + public void QueueGuestProgram(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + _asyncTranslationQueue.Add(new AsyncProgramTranslation(shaders, specState, programIndex, isCompute)); + } + + /// + /// Check the state of programs that have already been compiled, + /// and add to the cache if the compilation was successful. + /// + public void CheckCompilation() + { + ProcessCompilationQueue(); + + // Process programs that already finished compiling. + // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. + while (_validationQueue.TryPeek(out ProgramEntry entry)) + { + ProgramLinkStatus result = entry.HostProgram.CheckProgramLink(false); + + if (result != ProgramLinkStatus.Incomplete) + { + ProcessCompiledProgram(ref entry, result); + _validationQueue.Dequeue(); + } + else + { + break; + } + } + } + + /// + /// Waits until all programs finishes compiling, then adds the ones + /// with successful compilation to the cache. + /// + private void CheckCompilationBlocking() + { + ProcessCompilationQueue(); + + while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) + { + ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// + /// Process a compiled program result. + /// + /// Compiled program entry + /// Compilation result + /// For failed host compilations, indicates if a guest compilation should be done asynchronously + private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true) + { + if (result == ProgramLinkStatus.Success) + { + // Compilation successful, add to memory cache. + if (entry.IsCompute) + { + _computeCache.Add(entry.CachedProgram); + } + else + { + _graphicsCache.Add(entry.CachedProgram); + } + + if (!entry.IsBinary) + { + _needsHostRegen = true; + } + + _programList.Add(entry.ProgramIndex, entry.CachedProgram); + SignalCompiled(); + } + else if (entry.IsBinary) + { + // If this is a host binary and compilation failed, + // we still have a chance to recompile from the guest binary. + CachedShaderProgram program = entry.CachedProgram; + + if (asyncCompile) + { + QueueGuestProgram(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + } + else + { + RecompileFromGuestCode(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + ProcessCompilationQueue(); + } + } + else + { + // Failed to compile from both host and guest binary. + ErrorCount++; + SignalCompiled(); + } + } + + /// + /// Processes the queue of translated guest programs that should be compiled on the host. + /// + private void ProcessCompilationQueue() + { + while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active) + { + ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length]; + + int fragmentOutputMap = -1; + + for (int index = 0; index < compilation.TranslatedStages.Length; index++) + { + ShaderProgram shader = compilation.TranslatedStages[index]; + shaderSources[index] = CreateShaderSource(shader); + + if (shader.Info.Stage == ShaderStage.Fragment) + { + fragmentOutputMap = shader.Info.FragmentOutputMap; + } + } + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, new ShaderInfo(fragmentOutputMap)); + CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); + + EnqueueForValidation(new ProgramEntry(program, hostProgram, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + } + } + + /// + /// Enqueues a program for validation, which will check if the program was compiled successfully. + /// + /// Program entry to be validated + private void EnqueueForValidation(ProgramEntry newEntry) + { + _validationQueue.Enqueue(newEntry); + + // Do not allow more than N shader compilation in-flight, where N is the maximum number of threads + // the driver will be using for parallel compilation. + // Submitting more seems to cause NVIDIA OpenGL driver to crash. + if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) + { + ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// + /// Processses the queue of programs that should be translated from guest code. + /// + /// Cancellation token + private void ProcessAsyncQueue(object state) + { + CancellationToken ct = (CancellationToken)state; + + try + { + foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) + { + RecompileFromGuestCode( + asyncCompilation.Shaders, + asyncCompilation.SpecializationState, + asyncCompilation.ProgramIndex, + asyncCompilation.IsCompute); + } + } + catch (OperationCanceledException) + { + } + } + + /// + /// Recompiles a program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + /// Indicates if the program is a compute shader + private void RecompileFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + if (isCompute) + { + RecompileComputeFromGuestCode(shaders, specState, programIndex); + } + else + { + RecompileGraphicsFromGuestCode(shaders, specState, programIndex); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {diskCacheLoadException.Message}"); + + ErrorCount++; + SignalCompiled(); + } + } + + /// + /// Recompiles a graphics program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + private void RecompileGraphicsFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + { + ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.GraphicsState, specState.TransformFeedbackDescriptors); + ResourceCounts counts = new ResourceCounts(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + CachedShaderStage shader = shaders[stageIndex + 1]; + + if (shader != null) + { + byte[] guestCode = shader.Code; + byte[] cb1Data = shader.Cb1Data; + + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, 0); + + if (nextStage != null) + { + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && shaders[0] != null) + { + byte[] guestCodeA = shaders[0].Code; + byte[] cb1DataA = shaders[0].Cb1Data; + + DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, DefaultFlags | TranslationFlags.VertexA, 0); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + List translatedStages = new List(); + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + byte[] guestCode = shaders[stageIndex + 1].Code; + byte[] cb1Data = shaders[stageIndex + 1].Cb1Data; + + if (stageIndex == 0 && shaders[0] != null) + { + program = currentStage.Translate(translatorContexts[0]); + + byte[] guestCodeA = shaders[0].Code; + byte[] cb1DataA = shaders[0].Cb1Data; + + shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); + shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + else + { + program = currentStage.Translate(); + + shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + + if (program != null) + { + translatedStages.Add(program); + } + } + } + + _compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false)); + } + + /// + /// Recompiles a compute program from guest code. + /// + /// Shader stages + /// Specialization state + /// Program index + private void RecompileComputeFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + { + CachedShaderStage shader = shaders[0]; + ResourceCounts counts = new ResourceCounts(); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.ComputeState); + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, 0); + + ShaderProgram program = translatorContext.Translate(); + + shaders[0] = new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data); + + _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); + } + + /// + /// Signals that compilation of a program has been finished successfully, + /// or that it failed and guest recompilation has also been attempted. + /// + private void SignalCompiled() + { + _stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index a5c7575f2..192467b75 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -1,5 +1,5 @@ using Ryujinx.Common.Logging; -using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; using System; using System.Runtime.InteropServices; @@ -9,19 +9,12 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Represents a GPU state and memory accessor. /// - class GpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor + class GpuAccessor : GpuAccessorBase, IGpuAccessor { private readonly GpuChannel _channel; private readonly GpuAccessorState _state; private readonly int _stageIndex; private readonly bool _compute; - private readonly int _localSizeX; - private readonly int _localSizeY; - private readonly int _localSizeZ; - private readonly int _localMemorySize; - private readonly int _sharedMemorySize; - - public int Cb1DataSize { get; private set; } /// /// Creates a new instance of the GPU state accessor for graphics shader translation. @@ -43,43 +36,16 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU context /// GPU channel /// Current GPU state - /// Local group size X of the compute shader - /// Local group size Y of the compute shader - /// Local group size Z of the compute shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader - public GpuAccessor( - GpuContext context, - GpuChannel channel, - GpuAccessorState state, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) : base(context) + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context) { _channel = channel; _state = state; _compute = true; - _localSizeX = localSizeX; - _localSizeY = localSizeY; - _localSizeZ = localSizeZ; - _localMemorySize = localMemorySize; - _sharedMemorySize = sharedMemorySize; } - /// - /// Reads data from the constant buffer 1. - /// - /// Offset in bytes to read from - /// Value at the given offset + /// public uint ConstantBuffer1Read(int offset) { - if (Cb1DataSize < offset + 4) - { - Cb1DataSize = offset + 4; - } - ulong baseAddress = _compute ? _channel.BufferManager.GetComputeUniformBufferAddress(1) : _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1); @@ -87,111 +53,115 @@ namespace Ryujinx.Graphics.Gpu.Shader return _channel.MemoryManager.Physical.Read(baseAddress + (ulong)offset); } - /// - /// Prints a log message. - /// - /// Message to print + /// public void Log(string message) { Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}"); } - /// - /// Gets a span of the specified memory location, containing shader code. - /// - /// GPU virtual address of the data - /// Minimum size that the returned span may have - /// Span of the memory location - public override ReadOnlySpan GetCode(ulong address, int minimumSize) + /// + public ReadOnlySpan GetCode(ulong address, int minimumSize) { int size = Math.Max(minimumSize, 0x1000 - (int)(address & 0xfff)); return MemoryMarshal.Cast(_channel.MemoryManager.GetSpan(address, size)); } - /// - /// Queries Local Size X for compute shaders. - /// - /// Local Size X - public int QueryComputeLocalSizeX() => _localSizeX; + /// + public int QueryBindingConstantBuffer(int index) + { + return _state.ResourceCounts.UniformBuffersCount++; + } - /// - /// Queries Local Size Y for compute shaders. - /// - /// Local Size Y - public int QueryComputeLocalSizeY() => _localSizeY; + /// + public int QueryBindingStorageBuffer(int index) + { + return _state.ResourceCounts.StorageBuffersCount++; + } - /// - /// Queries Local Size Z for compute shaders. - /// - /// Local Size Z - public int QueryComputeLocalSizeZ() => _localSizeZ; + /// + public int QueryBindingTexture(int index) + { + return _state.ResourceCounts.TexturesCount++; + } - /// - /// Queries Local Memory size in bytes for compute shaders. - /// - /// Local Memory size in bytes - public int QueryComputeLocalMemorySize() => _localMemorySize; + /// + public int QueryBindingImage(int index) + { + return _state.ResourceCounts.ImagesCount++; + } - /// - /// Queries Shared Memory size in bytes for compute shaders. - /// - /// Shared Memory size in bytes - public int QueryComputeSharedMemorySize() => _sharedMemorySize; + /// + public int QueryComputeLocalSizeX() => _state.ComputeState.LocalSizeX; - /// - /// Queries Constant Buffer usage information. - /// - /// A mask where each bit set indicates a bound constant buffer + /// + public int QueryComputeLocalSizeY() => _state.ComputeState.LocalSizeY; + + /// + public int QueryComputeLocalSizeZ() => _state.ComputeState.LocalSizeZ; + + /// + public int QueryComputeLocalMemorySize() => _state.ComputeState.LocalMemorySize; + + /// + public int QueryComputeSharedMemorySize() => _state.ComputeState.SharedMemorySize; + + /// public uint QueryConstantBufferUse() { - return _compute + uint useMask = _compute ? _channel.BufferManager.GetComputeUniformBufferUseMask() : _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex); + + _state.SpecializationState?.RecordConstantBufferUse(_stageIndex, useMask); + return useMask; } - /// - /// Queries current primitive topology for geometry shaders. - /// - /// Current primitive topology + /// public InputTopology QueryPrimitiveTopology() { - return _state.Topology switch - { - PrimitiveTopology.Points => InputTopology.Points, - PrimitiveTopology.Lines or - PrimitiveTopology.LineLoop or - PrimitiveTopology.LineStrip => InputTopology.Lines, - PrimitiveTopology.LinesAdjacency or - PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency, - PrimitiveTopology.Triangles or - PrimitiveTopology.TriangleStrip or - PrimitiveTopology.TriangleFan => InputTopology.Triangles, - PrimitiveTopology.TrianglesAdjacency or - PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency, - PrimitiveTopology.Patches => _state.TessellationMode.UnpackPatchType() == TessPatchType.Isolines - ? InputTopology.Lines - : InputTopology.Triangles, - _ => InputTopology.Points - }; + _state.SpecializationState?.RecordPrimitiveTopology(); + return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode); } - /// - /// Queries the tessellation evaluation shader primitive winding order. - /// - /// True if the primitive winding order is clockwise, false if counter-clockwise - public bool QueryTessCw() => _state.TessellationMode.UnpackCw(); + /// + public bool QueryTessCw() + { + return _state.GraphicsState.TessellationMode.UnpackCw(); + } - /// - /// Queries the tessellation evaluation shader abstract patch type. - /// - /// Abstract patch type - public TessPatchType QueryTessPatchType() => _state.TessellationMode.UnpackPatchType(); + /// + public TessPatchType QueryTessPatchType() + { + return _state.GraphicsState.TessellationMode.UnpackPatchType(); + } - /// - /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. - /// - /// Spacing between tessellated vertices of the patch - public TessSpacing QueryTessSpacing() => _state.TessellationMode.UnpackSpacing(); + /// + public TessSpacing QueryTessSpacing() + { + return _state.GraphicsState.TessellationMode.UnpackSpacing(); + } + + //// + public TextureFormat QueryTextureFormat(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureFormat(_stageIndex, handle, cbufSlot); + var descriptor = GetTextureDescriptor(handle, cbufSlot); + return ConvertToTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb()); + } + + /// + public SamplerType QuerySamplerType(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureSamplerType(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType(); + } + + /// + public bool QueryTextureCoordNormalized(int handle, int cbufSlot) + { + _state.SpecializationState?.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot); + return GetTextureDescriptor(handle, cbufSlot).UnpackTextureCoordNormalized(); + } /// /// Gets the texture descriptor for a given texture on the pool. @@ -199,65 +169,58 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Index of the texture (this is the word offset of the handle in the constant buffer) /// Constant buffer slot for the texture handle /// Texture descriptor - public override Image.ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) + private Image.TextureDescriptor GetTextureDescriptor(int handle, int cbufSlot) { if (_compute) { return _channel.TextureManager.GetComputeTextureDescriptor( - _state.TexturePoolGpuVa, - _state.TextureBufferIndex, - _state.TexturePoolMaximumId, + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, handle, cbufSlot); } else { return _channel.TextureManager.GetGraphicsTextureDescriptor( - _state.TexturePoolGpuVa, - _state.TextureBufferIndex, - _state.TexturePoolMaximumId, + _state.PoolState.TexturePoolGpuVa, + _state.PoolState.TextureBufferIndex, + _state.PoolState.TexturePoolMaximumId, _stageIndex, handle, cbufSlot); } } - /// - /// Queries transform feedback enable state. - /// - /// True if the shader uses transform feedback, false otherwise + /// public bool QueryTransformFeedbackEnabled() { return _state.TransformFeedbackDescriptors != null; } - /// - /// Queries the varying locations that should be written to the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Varying locations for the specified buffer + /// public ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) { - return _state.TransformFeedbackDescriptors[bufferIndex].VaryingLocations; + return _state.TransformFeedbackDescriptors[bufferIndex].AsSpan(); } - /// - /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. - /// - /// Index of the transform feedback buffer - /// Stride for the specified buffer + /// public int QueryTransformFeedbackStride(int bufferIndex) { return _state.TransformFeedbackDescriptors[bufferIndex].Stride; } - /// - /// Queries if host state forces early depth testing. - /// - /// True if early depth testing is forced + /// public bool QueryEarlyZForce() { - return _state.EarlyZForce; + _state.SpecializationState?.RecordEarlyZForce(); + return _state.GraphicsState.EarlyZForce; + } + + /// + public void RegisterTexture(int handle, int cbufSlot) + { + _state.SpecializationState?.RegisterTexture(_stageIndex, handle, cbufSlot, GetTextureDescriptor(handle, cbufSlot)); } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs similarity index 74% rename from Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs rename to Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 3a8ee67a9..5f9dd5880 100644 --- a/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -1,23 +1,26 @@ -using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; -using System; namespace Ryujinx.Graphics.Gpu.Shader { - abstract class TextureDescriptorCapableGpuAccessor : IGpuAccessor + /// + /// GPU accessor. + /// + class GpuAccessorBase { private readonly GpuContext _context; - public TextureDescriptorCapableGpuAccessor(GpuContext context) + /// + /// Creates a new GPU accessor. + /// + /// GPU context + public GpuAccessorBase(GpuContext context) { _context = context; } - public abstract ReadOnlySpan GetCode(ulong address, int minimumSize); - - public abstract ITextureDescriptor GetTextureDescriptor(int handle, int cbufSlot); - /// /// Queries host about the presence of the FrontFacing built-in variable bug. /// @@ -79,20 +82,14 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; /// - /// Queries texture format information, for shaders using image load or store. + /// Converts a packed Maxwell texture format to the shader translator texture format. /// - /// - /// This only returns non-compressed color formats. - /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned. - /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// Color format of the non-compressed texture - public TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) + /// Packed maxwell format + /// Indicates if the format is sRGB + /// Shader translator texture format + protected static TextureFormat ConvertToTextureFormat(uint format, bool formatSrgb) { - var descriptor = GetTextureDescriptor(handle, cbufSlot); - - if (!FormatTable.TryGetTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb(), out FormatInfo formatInfo)) + if (!FormatTable.TryGetTextureFormat(format, formatSrgb, out FormatInfo formatInfo)) { return TextureFormat.Unknown; } @@ -144,32 +141,31 @@ namespace Ryujinx.Graphics.Gpu.Shader } /// - /// Queries sampler type information. + /// Converts the Maxwell primitive topology to the shader translator topology. /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// The sampler type value for the given handle - public SamplerType QuerySamplerType(int handle, int cbufSlot = -1) + /// Maxwell primitive topology + /// Maxwell tessellation mode + /// Shader translator topology + protected static InputTopology ConvertToInputTopology(PrimitiveTopology topology, TessMode tessellationMode) { - return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType(); - } - - /// - /// Queries texture target information. - /// - /// Texture handle - /// Constant buffer slot for the texture handle - /// True if the texture is a rectangle texture, false otherwise - public bool QueryIsTextureRectangle(int handle, int cbufSlot = -1) - { - var descriptor = GetTextureDescriptor(handle, cbufSlot); - - TextureTarget target = descriptor.UnpackTextureTarget(); - - bool is2DTexture = target == TextureTarget.Texture2D || - target == TextureTarget.Texture2DRect; - - return !descriptor.UnpackTextureCoordNormalized() && is2DTexture; + return topology switch + { + PrimitiveTopology.Points => InputTopology.Points, + PrimitiveTopology.Lines or + PrimitiveTopology.LineLoop or + PrimitiveTopology.LineStrip => InputTopology.Lines, + PrimitiveTopology.LinesAdjacency or + PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency, + PrimitiveTopology.Triangles or + PrimitiveTopology.TriangleStrip or + PrimitiveTopology.TriangleFan => InputTopology.Triangles, + PrimitiveTopology.TrianglesAdjacency or + PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency, + PrimitiveTopology.Patches => tessellationMode.UnpackPatchType() == TessPatchType.Isolines + ? InputTopology.Lines + : InputTopology.Triangles, + _ => InputTopology.Points + }; } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs index 6818072b4..0e8e979c8 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorState.cs @@ -1,72 +1,61 @@ -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Gpu.Engine.Threed; - namespace Ryujinx.Graphics.Gpu.Shader { /// /// State used by the . /// - struct GpuAccessorState + class GpuAccessorState { /// - /// GPU virtual address of the texture pool. + /// GPU texture pool state. /// - public ulong TexturePoolGpuVa { get; } + public readonly GpuChannelPoolState PoolState; /// - /// Maximum ID of the texture pool. + /// GPU compute state, for compute shaders. /// - public int TexturePoolMaximumId { get; } + public readonly GpuChannelComputeState ComputeState; /// - /// Constant buffer slot where the texture handles are located. + /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders. /// - public int TextureBufferIndex { get; } + public readonly GpuChannelGraphicsState GraphicsState; /// - /// Early Z force enable. + /// Shader specialization state (shared by all stages). /// - public bool EarlyZForce { get; } - - /// - /// Primitive topology of current draw. - /// - public PrimitiveTopology Topology { get; } - - /// - /// Tessellation mode. - /// - public TessMode TessellationMode { get; } + public readonly ShaderSpecializationState SpecializationState; /// /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null. /// - public TransformFeedbackDescriptor[] TransformFeedbackDescriptors { get; set; } + public readonly TransformFeedbackDescriptor[] TransformFeedbackDescriptors; /// - /// Creates a new instance of the GPU accessor state. + /// Shader resource counts (shared by all stages). /// - /// GPU virtual address of the texture pool - /// Maximum ID of the texture pool - /// Constant buffer slot where the texture handles are located - /// Early Z force enable - /// Primitive topology - /// Tessellation mode + public readonly ResourceCounts ResourceCounts; + + /// + /// Creates a new GPU accessor state. + /// + /// GPU texture pool state + /// GPU compute state, for compute shaders + /// GPU graphics state, for vertex, tessellation, geometry and fragment shaders + /// Shader specialization state (shared by all stages) + /// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null public GpuAccessorState( - ulong texturePoolGpuVa, - int texturePoolMaximumId, - int textureBufferIndex, - bool earlyZForce, - PrimitiveTopology topology, - TessMode tessellationMode) + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + GpuChannelGraphicsState graphicsState, + ShaderSpecializationState specializationState, + TransformFeedbackDescriptor[] transformFeedbackDescriptors = null) { - TexturePoolGpuVa = texturePoolGpuVa; - TexturePoolMaximumId = texturePoolMaximumId; - TextureBufferIndex = textureBufferIndex; - EarlyZForce = earlyZForce; - Topology = topology; - TessellationMode = tessellationMode; - TransformFeedbackDescriptors = null; + PoolState = poolState; + GraphicsState = graphicsState; + ComputeState = computeState; + SpecializationState = specializationState; + TransformFeedbackDescriptors = transformFeedbackDescriptors; + ResourceCounts = new ResourceCounts(); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs new file mode 100644 index 000000000..89a3db712 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs @@ -0,0 +1,57 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelComputeState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Local group size X of the compute shader. + /// + public readonly int LocalSizeX; + + /// + /// Local group size Y of the compute shader. + /// + public readonly int LocalSizeY; + + /// + /// Local group size Z of the compute shader. + /// + public readonly int LocalSizeZ; + + /// + /// Local memory size of the compute shader. + /// + public readonly int LocalMemorySize; + + /// + /// Shared memory size of the compute shader. + /// + public readonly int SharedMemorySize; + + /// + /// Creates a new GPU compute state. + /// + /// Local group size X of the compute shader + /// Local group size Y of the compute shader + /// Local group size Z of the compute shader + /// Local memory size of the compute shader + /// Shared memory size of the compute shader + public GpuChannelComputeState( + int localSizeX, + int localSizeY, + int localSizeZ, + int localMemorySize, + int sharedMemorySize) + { + LocalSizeX = localSizeX; + LocalSizeY = localSizeY; + LocalSizeZ = localSizeZ; + LocalMemorySize = localMemorySize; + SharedMemorySize = sharedMemorySize; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs new file mode 100644 index 000000000..5eb31db69 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs @@ -0,0 +1,41 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelGraphicsState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Early Z force enable. + /// + public readonly bool EarlyZForce; + + /// + /// Primitive topology of current draw. + /// + public readonly PrimitiveTopology Topology; + + /// + /// Tessellation mode. + /// + public readonly TessMode TessellationMode; + + /// + /// Creates a new GPU graphics state. + /// + /// Early Z force enable + /// Primitive topology + /// Tessellation mode + public GpuChannelGraphicsState(bool earlyZForce, PrimitiveTopology topology, TessMode tessellationMode) + { + EarlyZForce = earlyZForce; + Topology = topology; + TessellationMode = tessellationMode; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs new file mode 100644 index 000000000..0b36227ac --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelPoolState.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// State used by the . + /// + struct GpuChannelPoolState + { + /// + /// GPU virtual address of the texture pool. + /// + public readonly ulong TexturePoolGpuVa; + + /// + /// Maximum ID of the texture pool. + /// + public readonly int TexturePoolMaximumId; + + /// + /// Constant buffer slot where the texture handles are located. + /// + public readonly int TextureBufferIndex; + + /// + /// Creates a new GPU texture pool state. + /// + /// GPU virtual address of the texture pool + /// Maximum ID of the texture pool + /// Constant buffer slot where the texture handles are located + public GpuChannelPoolState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex) + { + TexturePoolGpuVa = texturePoolGpuVa; + TexturePoolMaximumId = texturePoolMaximumId; + TextureBufferIndex = textureBufferIndex; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs new file mode 100644 index 000000000..584eefdc6 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/HashState.cs @@ -0,0 +1,113 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// State of a hash calculation. + /// + struct HashState + { + // This is using a slightly modified implementation of FastHash64. + // Reference: https://github.com/ztanml/fast-hash/blob/master/fasthash.c + private const ulong M = 0x880355f21e6d1965UL; + private ulong _hash; + private int _start; + + /// + /// One shot hash calculation for a given data. + /// + /// Data to be hashed + /// Hash of the given data + public static uint CalcHash(ReadOnlySpan data) + { + HashState state = new HashState(); + + state.Initialize(); + state.Continue(data); + return state.Finalize(data); + } + + /// + /// Initializes the hash state. + /// + public void Initialize() + { + _hash = 23; + } + + /// + /// Calculates the hash of the given data. + /// + /// + /// The full data must be passed on . + /// If this is not the first time the method is called, then must start with the data passed on the last call. + /// If a smaller slice of the data was already hashed before, only the additional data will be hashed. + /// This can be used for additive hashing of data in chuncks. + /// + /// Data to be hashed + public void Continue(ReadOnlySpan data) + { + ulong h = _hash; + + ReadOnlySpan dataAsUlong = MemoryMarshal.Cast(data.Slice(_start)); + + for (int i = 0; i < dataAsUlong.Length; i++) + { + ulong value = dataAsUlong[i]; + + h ^= Mix(value); + h *= M; + } + + _hash = h; + _start = data.Length & ~7; + } + + /// + /// Performs the hash finalization step, and returns the calculated hash. + /// + /// + /// The full data must be passed on . + /// must start with the data passed on the last call to . + /// No internal state is changed, so one can still continue hashing data with + /// after calling this method. + /// + /// Data to be hashed + /// Hash of all the data hashed with this + public uint Finalize(ReadOnlySpan data) + { + ulong h = _hash; + + int remainder = data.Length & 7; + if (remainder != 0) + { + ulong v = 0; + + for (int i = data.Length - remainder; i < data.Length; i++) + { + v |= (ulong)data[i] << ((i - remainder) * 8); + } + + h ^= Mix(v); + h *= M; + } + + h = Mix(h); + return (uint)(h - (h >> 32)); + } + + /// + /// Hash mix function. + /// + /// Hash to mix + /// Mixed hash + private static ulong Mix(ulong h) + { + h ^= h >> 23; + h *= 0x2127599bf4325c37UL; + h ^= h >> 47; + return h; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs new file mode 100644 index 000000000..c982cd9f6 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/IDataAccessor.cs @@ -0,0 +1,27 @@ +using System; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Data accessor, used by to access data of unknown length. + /// + /// + /// This will be used to access chuncks of data and try finding a match on the table. + /// This is necessary because the data size is assumed to be unknown, and so the + /// hash table must try to "guess" the size of the data based on the entries on the table. + /// + public interface IDataAccessor + { + /// + /// Gets a span of shader code at the specified offset, with at most the specified size. + /// + /// + /// This might return a span smaller than the requested if there's + /// no more code available. + /// + /// Offset in shader code + /// Size in bytes + /// Shader code span + ReadOnlySpan GetSpan(int offset, int length); + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs new file mode 100644 index 000000000..6a563c16f --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionHashTable.cs @@ -0,0 +1,452 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Partitioned hash table. + /// + /// Hash table entry type + class PartitionHashTable + { + /// + /// Hash table entry. + /// + private struct Entry + { + /// + /// Hash bytes of . + /// + public readonly uint Hash; + + /// + /// If this entry is only a sub-region of , this indicates the size in bytes + /// of that region. Otherwise, it should be zero. + /// + public readonly int OwnSize; + + /// + /// Data used to compute the hash for this entry. + /// + /// + /// To avoid additional allocations, this might be a instance of the full entry data, + /// and only a sub-region of it might be actually used by this entry. Such sub-region + /// has its size indicated by in this case. + /// + public readonly byte[] Data; + + /// + /// Item associated with this entry. + /// + public T Item; + + /// + /// Indicates if the entry is partial, which means that this entry is only for a sub-region of the data. + /// + /// + /// Partial entries have no items associated with them. They just indicates that the data might be present on + /// the table, and one must keep looking for the full entry on other tables of larger data size. + /// + public bool IsPartial => OwnSize != 0; + + /// + /// Creates a new partial hash table entry. + /// + /// Hash of the data + /// Full data + /// Size of the sub-region of data that belongs to this entry + public Entry(uint hash, byte[] ownerData, int ownSize) + { + Hash = hash; + OwnSize = ownSize; + Data = ownerData; + Item = default; + } + + /// + /// Creates a new full hash table entry. + /// + /// Hash of the data + /// Data + /// Item associated with this entry + public Entry(uint hash, byte[] data, T item) + { + Hash = hash; + OwnSize = 0; + Data = data; + Item = item; + } + + /// + /// Gets the data for this entry, either full or partial. + /// + /// Data sub-region + public ReadOnlySpan GetData() + { + if (OwnSize != 0) + { + return new ReadOnlySpan(Data).Slice(0, OwnSize); + } + + return Data; + } + } + + /// + /// Hash table bucket. + /// + private struct Bucket + { + /// + /// Inline entry, to avoid allocations for the common single entry case. + /// + public Entry InlineEntry; + + /// + /// List of additional entries for the not-so-common multiple entries case. + /// + public List MoreEntries; + } + + private Bucket[] _buckets; + private int _count; + + /// + /// Total amount of entries on the hash table. + /// + public int Count => _count; + + /// + /// Creates a new instance of the partitioned hash table. + /// + public PartitionHashTable() + { + _buckets = Array.Empty(); + } + + /// + /// Gets an item on the table, or adds a new one if not present. + /// + /// Data + /// Hash of the data + /// Item to be added if not found + /// Existing item if found, or if not found + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out T existingItem)) + { + return existingItem; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return item; + } + + /// + /// Adds an item to the hash table. + /// + /// Data + /// Hash of the data + /// Item to be added + /// True if the item was added, false due to an item associated with the data already being on the table + public bool Add(byte[] data, uint dataHash, T item) + { + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, data, item); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// + /// Adds a partial entry to the hash table. + /// + /// Full data + /// Size of the sub-region of used by the partial entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, int ownSize) + { + ReadOnlySpan data = new ReadOnlySpan(ownerData).Slice(0, ownSize); + + return AddPartial(ownerData, HashState.CalcHash(data), ownSize); + } + + /// + /// Adds a partial entry to the hash table. + /// + /// Full data + /// Hash of the data sub-region + /// Size of the sub-region of used by the partial entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, uint dataHash, int ownSize) + { + ReadOnlySpan data = new ReadOnlySpan(ownerData).Slice(0, ownSize); + + if (TryFindItem(dataHash, data, out _)) + { + return false; + } + + Entry entry = new Entry(dataHash, ownerData, ownSize); + + AddToBucket(dataHash, ref entry); + + return true; + } + + /// + /// Adds entry with a given hash to the table. + /// + /// Hash of the entry + /// Entry + private void AddToBucket(uint dataHash, ref Entry entry) + { + int pow2Count = GetPow2Count(++_count); + if (pow2Count != _buckets.Length) + { + Rebuild(pow2Count); + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + AddToBucket(ref bucket, ref entry); + } + + /// + /// Adds an entry to a bucket. + /// + /// Bucket to add the entry into + /// Entry to be added + private void AddToBucket(ref Bucket bucket, ref Entry entry) + { + if (bucket.InlineEntry.Data == null) + { + bucket.InlineEntry = entry; + } + else + { + (bucket.MoreEntries ??= new List()).Add(entry); + } + } + + /// + /// Creates partial entries on a new hash table for all existing full entries. + /// + /// + /// This should be called every time a new hash table is created, and there are hash + /// tables with data sizes that are higher than that of the new table. + /// This will then fill the new hash table with "partial" entries of full entries + /// on the hash tables with higher size. + /// + /// New hash table + /// Size of the data on the new hash table + public void FillPartials(PartitionHashTable newTable, int newEntrySize) + { + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + ref Entry inlineEntry = ref bucket.InlineEntry; + + if (inlineEntry.Data != null) + { + if (!inlineEntry.IsPartial) + { + newTable.AddPartial(inlineEntry.Data, newEntrySize); + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.IsPartial) + { + continue; + } + + newTable.AddPartial(entry.Data, newEntrySize); + } + } + } + } + } + + /// + /// Tries to find an item on the table. + /// + /// Hash of + /// Data to find + /// Item associated with the data + /// True if an item was found, false otherwise + private bool TryFindItem(uint dataHash, ReadOnlySpan data, out T item) + { + if (_count == 0) + { + item = default; + return false; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(data)) + { + item = bucket.InlineEntry.Item; + return true; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(data)) + { + item = entry.Item; + return true; + } + } + } + } + + item = default; + return false; + } + + /// + /// Indicates the result of a hash table lookup. + /// + public enum SearchResult + { + /// + /// No entry was found, the search must continue on hash tables of lower size. + /// + NotFound, + + /// + /// A partial entry was found, the search must continue on hash tables of higher size. + /// + FoundPartial, + + /// + /// A full entry was found, the search was concluded and the item can be retrieved. + /// + FoundFull + } + + /// + /// Tries to find an item on the table. + /// + /// Data accessor + /// Size of the hash table data + /// The item on the table, if found, otherwise unmodified + /// The data on the table, if found, otherwise unmodified + /// Table lookup result + public SearchResult TryFindItem(ref SmartDataAccessor dataAccessor, int size, ref T item, ref byte[] data) + { + if (_count == 0) + { + return SearchResult.NotFound; + } + + ReadOnlySpan dataSpan = dataAccessor.GetSpanAndHash(size, out uint dataHash); + + if (dataSpan.Length != size) + { + return SearchResult.NotFound; + } + + ref Bucket bucket = ref GetBucketForHash(dataHash); + + if (bucket.InlineEntry.Data != null) + { + if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(dataSpan)) + { + item = bucket.InlineEntry.Item; + data = bucket.InlineEntry.Data; + return bucket.InlineEntry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + if (entry.Hash == dataHash && entry.GetData().SequenceEqual(dataSpan)) + { + item = entry.Item; + data = entry.Data; + return entry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull; + } + } + } + } + + return SearchResult.NotFound; + } + + /// + /// Rebuilds the table for a new count. + /// + /// New power of two count of the table + private void Rebuild(int newPow2Count) + { + Bucket[] newBuckets = new Bucket[newPow2Count]; + + uint mask = (uint)newPow2Count - 1; + + for (int i = 0; i < _buckets.Length; i++) + { + ref Bucket bucket = ref _buckets[i]; + + if (bucket.InlineEntry.Data != null) + { + AddToBucket(ref newBuckets[(int)(bucket.InlineEntry.Hash & mask)], ref bucket.InlineEntry); + + if (bucket.MoreEntries != null) + { + foreach (Entry entry in bucket.MoreEntries) + { + Entry entryCopy = entry; + AddToBucket(ref newBuckets[(int)(entry.Hash & mask)], ref entryCopy); + } + } + } + } + + _buckets = newBuckets; + } + + /// + /// Gets the bucket for a given hash. + /// + /// Data hash + /// Bucket for the hash + private ref Bucket GetBucketForHash(uint hash) + { + int index = (int)(hash & (_buckets.Length - 1)); + + return ref _buckets[index]; + } + + /// + /// Gets a power of two count from a regular count. + /// + /// Count + /// Power of two count + private static int GetPow2Count(int count) + { + // This returns the nearest power of two that is lower than count. + // This was done to optimize memory usage rather than performance. + return 1 << BitOperations.Log2((uint)count); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs new file mode 100644 index 000000000..4c9cc4d40 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/PartitionedHashTable.cs @@ -0,0 +1,244 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Partitioned hash table. + /// + /// + public class PartitionedHashTable + { + /// + /// Entry for a given data size. + /// + private struct SizeEntry + { + /// + /// Size for the data that will be stored on the hash table on this entry. + /// + public int Size { get; } + + /// + /// Number of entries on the hash table. + /// + public int TableCount => _table.Count; + + private readonly PartitionHashTable _table; + + /// + /// Creates an entry for a given size. + /// + /// Size of the data to be stored on this entry + public SizeEntry(int size) + { + Size = size; + _table = new PartitionHashTable(); + } + + /// + /// Gets an item for existing data, or adds a new one. + /// + /// Data associated with the item + /// Hash of + /// Item to be added + /// Existing item, or if not present + public T GetOrAdd(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.GetOrAdd(data, dataHash, item); + } + + /// + /// Adds a new item. + /// + /// Data associated with the item + /// Hash of + /// Item to be added + /// True if added, false otherwise + public bool Add(byte[] data, uint dataHash, T item) + { + Debug.Assert(data.Length == Size); + return _table.Add(data, dataHash, item); + } + + /// + /// Adds a partial entry. + /// + /// Full entry data + /// Hash of the sub-region of the data that belongs to this entry + /// True if added, false otherwise + public bool AddPartial(byte[] ownerData, uint dataHash) + { + return _table.AddPartial(ownerData, dataHash, Size); + } + + /// + /// Fills a new hash table with "partials" of existing full entries of higher size. + /// + /// Entry with the new hash table + public void FillPartials(SizeEntry newEntry) + { + Debug.Assert(newEntry.Size < Size); + _table.FillPartials(newEntry._table, newEntry.Size); + } + + /// + /// Tries to find an item on the hash table. + /// + /// Data accessor + /// The item on the table, if found, otherwise unmodified + /// The data on the table, if found, otherwise unmodified + /// Table lookup result + public PartitionHashTable.SearchResult TryFindItem(ref SmartDataAccessor dataAccessor, ref T item, ref byte[] data) + { + return _table.TryFindItem(ref dataAccessor, Size, ref item, ref data); + } + } + + private readonly List _sizeTable; + + /// + /// Creates a new partitioned hash table. + /// + public PartitionedHashTable() + { + _sizeTable = new List(); + } + + /// + /// Adds a new item to the table. + /// + /// Data + /// Item associated with the data + public void Add(byte[] data, T item) + { + GetOrAdd(data, item); + } + + /// + /// Gets an existing item from the table, or adds a new one if not present. + /// + /// Data + /// Item associated with the data + /// Existing item, or if not present + public T GetOrAdd(byte[] data, T item) + { + SizeEntry sizeEntry; + + int index = BinarySearch(_sizeTable, data.Length); + if (index < _sizeTable.Count && _sizeTable[index].Size == data.Length) + { + sizeEntry = _sizeTable[index]; + } + else + { + if (index < _sizeTable.Count && _sizeTable[index].Size < data.Length) + { + index++; + } + + sizeEntry = new SizeEntry(data.Length); + + _sizeTable.Insert(index, sizeEntry); + + for (int i = index + 1; i < _sizeTable.Count; i++) + { + _sizeTable[i].FillPartials(sizeEntry); + } + } + + HashState hashState = new HashState(); + hashState.Initialize(); + + for (int i = 0; i < index; i++) + { + ReadOnlySpan dataSlice = new ReadOnlySpan(data).Slice(0, _sizeTable[i].Size); + hashState.Continue(dataSlice); + _sizeTable[i].AddPartial(data, hashState.Finalize(dataSlice)); + } + + hashState.Continue(data); + return sizeEntry.GetOrAdd(data, hashState.Finalize(data), item); + } + + /// + /// Performs binary search on a list of hash tables, each one with a fixed data size. + /// + /// List of hash tables + /// Size to search for + /// Index of the hash table with the given size, or nearest one otherwise + private static int BinarySearch(List entries, int size) + { + int left = 0; + int middle = 0; + int right = entries.Count - 1; + + while (left <= right) + { + middle = left + ((right - left) >> 1); + + SizeEntry entry = entries[middle]; + + if (size == entry.Size) + { + break; + } + + if (size < entry.Size) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + /// + /// Tries to find an item on the table. + /// + /// Data accessor + /// Item, if found + /// Data, if found + /// True if the item was found on the table, false otherwise + public bool TryFindItem(IDataAccessor dataAccessor, out T item, out byte[] data) + { + SmartDataAccessor sda = new SmartDataAccessor(dataAccessor); + + item = default; + data = null; + + int left = 0; + int right = _sizeTable.Count; + + while (left != right) + { + int index = left + ((right - left) >> 1); + + PartitionHashTable.SearchResult result = _sizeTable[index].TryFindItem(ref sda, ref item, ref data); + + if (result == PartitionHashTable.SearchResult.FoundFull) + { + return true; + } + + if (result == PartitionHashTable.SearchResult.NotFound) + { + right = index; + } + else /* if (result == PartitionHashTable.SearchResult.FoundPartial) */ + { + left = index + 1; + } + } + + data = null; + return false; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs new file mode 100644 index 000000000..0632add6c --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/HashTable/SmartDataAccessor.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader.HashTable +{ + /// + /// Smart data accessor that can cache data and hashes to avoid reading and re-hashing the same memory regions. + /// + ref struct SmartDataAccessor + { + private readonly IDataAccessor _dataAccessor; + private ReadOnlySpan _data; + private readonly SortedList _cachedHashes; + + /// + /// Creates a new smart data accessor. + /// + /// Data accessor + public SmartDataAccessor(IDataAccessor dataAccessor) + { + _dataAccessor = dataAccessor; + _data = ReadOnlySpan.Empty; + _cachedHashes = new SortedList(); + } + + /// + /// Get a spans of a given size. + /// + /// + /// The actual length of the span returned depends on the + /// and might be less than requested. + /// + /// Size in bytes + /// Span with the requested size + public ReadOnlySpan GetSpan(int length) + { + if (_data.Length < length) + { + _data = _dataAccessor.GetSpan(0, length); + } + else if (_data.Length > length) + { + return _data.Slice(0, length); + } + + return _data; + } + + /// + /// Gets a span of the requested size, and a hash of its data. + /// + /// Length of the span + /// Hash of the span data + /// Span of data + public ReadOnlySpan GetSpanAndHash(int length, out uint hash) + { + ReadOnlySpan data = GetSpan(length); + hash = data.Length == length ? CalcHashCached(data) : 0; + return data; + } + + /// + /// Calculates the hash for a requested span. + /// This will try to use a cached hash if the data was already accessed before, to avoid re-hashing. + /// + /// Data to be hashed + /// Hash of the data + private uint CalcHashCached(ReadOnlySpan data) + { + HashState state = default; + bool found = false; + + for (int i = _cachedHashes.Count - 1; i >= 0; i--) + { + int cachedHashSize = _cachedHashes.Keys[i]; + + if (cachedHashSize < data.Length) + { + state = _cachedHashes.Values[i]; + found = true; + break; + } + } + + if (!found) + { + state = new HashState(); + state.Initialize(); + } + + state.Continue(data); + _cachedHashes[data.Length & ~7] = state; + return state.Finalize(data); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs new file mode 100644 index 000000000..b85423cb3 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Holds counts for the resources used by a shader. + /// + class ResourceCounts + { + /// + /// Total of uniform buffers used by the shaders. + /// + public int UniformBuffersCount; + + /// + /// Total of storage buffers used by the shaders. + /// + public int StorageBuffersCount; + + /// + /// Total of textures used by the shaders. + /// + public int TexturesCount; + + /// + /// Total of images used by the shaders. + /// + public int ImagesCount; + + /// + /// Creates a new instance of the shader resource counts class. + /// + public ResourceCounts() + { + UniformBuffersCount = 1; // The first binding is reserved for the support buffer. + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs index 39bf10fab..651dfd263 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderAddresses.cs @@ -1,4 +1,6 @@ using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Shader { @@ -9,7 +11,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { #pragma warning disable CS0649 public ulong VertexA; - public ulong Vertex; + public ulong VertexB; public ulong TessControl; public ulong TessEvaluation; public ulong Geometry; @@ -34,7 +36,7 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool Equals(ShaderAddresses other) { return VertexA == other.VertexA && - Vertex == other.Vertex && + VertexB == other.VertexB && TessControl == other.TessControl && TessEvaluation == other.TessEvaluation && Geometry == other.Geometry && @@ -47,7 +49,16 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Hash code public override int GetHashCode() { - return HashCode.Combine(VertexA, Vertex, TessControl, TessEvaluation, Geometry, Fragment); + return HashCode.Combine(VertexA, VertexB, TessControl, TessEvaluation, Geometry, Fragment); + } + + /// + /// Gets a view of the structure as a span of addresses. + /// + /// Span of addresses + public Span AsSpan() + { + return MemoryMarshal.CreateSpan(ref VertexA, Unsafe.SizeOf() / sizeof(ulong)); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index f38709001..03d5ecade 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -1,18 +1,14 @@ -using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache; -using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; -using System.Diagnostics; -using System.Runtime.InteropServices; using System.Threading; -using System.Threading.Tasks; namespace Ryujinx.Graphics.Gpu.Shader { @@ -21,30 +17,66 @@ namespace Ryujinx.Graphics.Gpu.Shader /// class ShaderCache : IDisposable { - private const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; + /// + /// Default flags used on the shader translation process. + /// + public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; + + private struct TranslatedShader + { + public readonly CachedShaderStage Shader; + public readonly ShaderProgram Program; + + public TranslatedShader(CachedShaderStage shader, ShaderProgram program) + { + Shader = shader; + Program = program; + } + } + + private struct TranslatedShaderVertexPair + { + public readonly CachedShaderStage VertexA; + public readonly CachedShaderStage VertexB; + public readonly ShaderProgram Program; + + public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program) + { + VertexA = vertexA; + VertexB = vertexB; + Program = program; + } + } private readonly GpuContext _context; private readonly ShaderDumper _dumper; - private readonly Dictionary> _cpPrograms; - private readonly Dictionary> _gpPrograms; + private readonly Dictionary _cpPrograms; + private readonly Dictionary _gpPrograms; - private CacheManager _cacheManager; + private struct ProgramToSave + { + public readonly CachedShaderProgram CachedProgram; + public readonly IProgram HostProgram; - private Dictionary _gpProgramsDiskCache; - private Dictionary _cpProgramsDiskCache; + public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram) + { + CachedProgram = cachedProgram; + HostProgram = hostProgram; + } + } - private Queue<(IProgram, Action)> _programsToSaveQueue; + private Queue _programsToSaveQueue; + + private readonly ComputeShaderCacheHashTable _computeShaderCache; + private readonly ShaderCacheHashTable _graphicsShaderCache; + private readonly DiskCacheHostStorage _diskCacheHostStorage; + private readonly BackgroundDiskCacheWriter _cacheWriter; /// - /// Version of the codegen (to be changed when codegen or guest format change). + /// Event for signalling shader cache loading progress. /// - private const ulong ShaderCodeGenVersion = 3251; - - // Progress reporting helpers - private volatile int _shaderCount; - private volatile int _totalShaderCount; public event Action ShaderCacheStateChanged; /// @@ -57,12 +89,23 @@ namespace Ryujinx.Graphics.Gpu.Shader _dumper = new ShaderDumper(); - _cpPrograms = new Dictionary>(); - _gpPrograms = new Dictionary>(); - _gpProgramsDiskCache = new Dictionary(); - _cpProgramsDiskCache = new Dictionary(); + _cpPrograms = new Dictionary(); + _gpPrograms = new Dictionary(); - _programsToSaveQueue = new Queue<(IProgram, Action)>(); + _programsToSaveQueue = new Queue(); + + string diskCacheTitleId = GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null + ? CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId) + : null; + + _computeShaderCache = new ComputeShaderCacheHashTable(); + _graphicsShaderCache = new ShaderCacheHashTable(); + _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId); + + if (_diskCacheHostStorage.CacheEnabled) + { + _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage); + } } /// @@ -72,13 +115,17 @@ namespace Ryujinx.Graphics.Gpu.Shader { // Check to see if the binaries for previously compiled shaders are ready, and save them out. - while (_programsToSaveQueue.Count > 0) + while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave)) { - (IProgram program, Action dataAction) = _programsToSaveQueue.Peek(); + ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false); - if (program.CheckProgramLink(false) != ProgramLinkStatus.Incomplete) + if (result != ProgramLinkStatus.Incomplete) { - dataAction(program.GetBinary()); + if (result == ProgramLinkStatus.Success) + { + _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary()); + } + _programsToSaveQueue.Dequeue(); } else @@ -91,463 +138,48 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Initialize the cache. /// - internal void Initialize() + /// Cancellation token to cancel the shader cache initialization process + internal void Initialize(CancellationToken cancellationToken) { - if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) + if (_diskCacheHostStorage.CacheEnabled) { - _cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); - - bool isReadOnly = _cacheManager.IsReadOnly; - - HashSet invalidEntries = null; - - if (isReadOnly) + if (!_diskCacheHostStorage.CacheExists()) { - Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); - } - else - { - invalidEntries = new HashSet(); + // If we don't have a shader cache on the new format, try to perform migration from the old shader cache. + Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache..."); + + int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage); + + Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders."); } - ReadOnlySpan guestProgramList = _cacheManager.GetGuestProgramList(); + ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( + _context, + _graphicsShaderCache, + _computeShaderCache, + _diskCacheHostStorage, + cancellationToken, + ShaderCacheStateUpdate); - using AutoResetEvent progressReportEvent = new AutoResetEvent(false); + loader.LoadShaders(); - _shaderCount = 0; - _totalShaderCount = guestProgramList.Length; - - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Start, _shaderCount, _totalShaderCount); - Thread progressReportThread = null; - - if (guestProgramList.Length > 0) + int errorCount = loader.ErrorCount; + if (errorCount != 0) { - progressReportThread = new Thread(ReportProgress) - { - Name = "ShaderCache.ProgressReporter", - Priority = ThreadPriority.Lowest, - IsBackground = true - }; - - progressReportThread.Start(progressReportEvent); + Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache."); } - - // Make sure these are initialized before doing compilation. - Capabilities caps = _context.Capabilities; - - int maxTaskCount = Math.Min(Environment.ProcessorCount, 8); - int programIndex = 0; - List activeTasks = new List(); - - using AutoResetEvent taskDoneEvent = new AutoResetEvent(false); - - // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. - // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. - - while (programIndex < guestProgramList.Length || activeTasks.Count > 0) - { - if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length) - { - // Begin a new shader compilation. - Hash128 key = guestProgramList[programIndex]; - - byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); - bool hasHostCache = hostProgramBinary != null; - - IProgram hostProgram = null; - - // If the program sources aren't in the cache, compile from saved guest program. - byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - - if (guestProgram == null) - { - Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); - - // Should not happen, but if someone messed with the cache it's better to catch it. - invalidEntries?.Add(key); - - _shaderCount = ++programIndex; - - continue; - } - - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) - { - Debug.Assert(cachedShaderEntries.Length == 1); - - GuestShaderCacheEntry entry = cachedShaderEntries[0]; - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary, false, new ShaderInfo(-1)); - } - - ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); - activeTasks.Add(task); - - task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => - { - ShaderProgram program = null; - ShaderProgramInfo shaderProgramInfo = null; - - if (isHostProgramValid) - { - // Reconstruct code holder. - - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); - - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, code); - - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); - - return true; - } - else - { - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - - Task compileTask = Task.Run(() => - { - var binaryCode = new Memory(entry.Code); - - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors, - null); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); - program = Translator.CreateContext(0, gpuAccessor, options).Translate(out shaderProgramInfo); - }); - - task.OnTask(compileTask, (bool _, ShaderCompileTask task) => - { - if (task.IsFaulted) - { - Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding..."); - - _cacheManager.RemoveProgram(ref key); - return true; // Exit early, the decoding step failed. - } - - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, code); - - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - // Compile shader and create program as the shader program binary got invalidated. - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, program.Code); - hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, new ShaderInfo(-1)); - - task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => - { - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); - - return true; - }); - - return false; // Not finished: still need to compile the host program. - }); - - return false; // Not finished: translating the program. - } - }); - } - else - { - Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); - - ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; - List shaderPrograms = new List(); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - TranslationCounts counts = new TranslationCounts(); - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - - bool hasFragmentShader = false; - int fragmentOutputMap = -1; - int fragmentIndex = (int)ShaderStage.Fragment - 1; - - if (hostShaderEntries[fragmentIndex] != null && hostShaderEntries[fragmentIndex].Header.InUse) - { - hasFragmentShader = true; - fragmentOutputMap = hostShaderEntries[fragmentIndex].Header.FragmentOutputMap; - } - - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); - } - - ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); - activeTasks.Add(task); - - GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); - - task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => - { - Task compileTask = Task.Run(() => - { - TranslatorContext[] shaderContexts = null; - - if (!isHostProgramValid) - { - shaderContexts = new TranslatorContext[1 + entries.Length]; - - for (int i = 0; i < entries.Length; i++) - { - GuestShaderCacheEntry entry = entries[i]; - - if (entry == null) - { - continue; - } - - var binaryCode = new Memory(entry.Code); - - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors, - tfd); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags); - - shaderContexts[i + 1] = Translator.CreateContext(0, gpuAccessor, options, counts); - - if (entry.Header.SizeA != 0) - { - var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.VertexA); - - shaderContexts[0] = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts); - } - } - } - - // Reconstruct code holder. - for (int i = 0; i < entries.Length; i++) - { - GuestShaderCacheEntry entry = entries[i]; - - if (entry == null) - { - continue; - } - - ShaderProgram program; - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - int stageIndex = i + 1; - - TranslatorContext currentStage = shaderContexts[stageIndex]; - TranslatorContext nextStage = GetNextStageContext(shaderContexts, stageIndex); - TranslatorContext vertexA = stageIndex == 1 ? shaderContexts[0] : null; - - program = currentStage.Translate(out shaderProgramInfo, nextStage, vertexA); - } - - // NOTE: Vertex B comes first in the shader cache. - byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan(entry.Header.Size, entry.Header.SizeA).ToArray() : null; - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); - - shaderPrograms.Add(program); - } - }); - - task.OnTask(compileTask, (bool _, ShaderCompileTask task) => - { - if (task.IsFaulted) - { - Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding..."); - - _cacheManager.RemoveProgram(ref key); - return true; // Exit early, the decoding step failed. - } - - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (!isHostProgramValid) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - List hostShaders = new List(); - - // Compile shaders and create program as the shader program binary got invalidated. - for (int stage = 0; stage < Constants.ShaderStages; stage++) - { - ShaderProgram program = shaders[stage]?.Program; - - if (program == null) - { - continue; - } - - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; - - hostShaders.Add(hostShader); - } - - int fragmentIndex = (int)ShaderStage.Fragment - 1; - int fragmentOutputMap = -1; - - if (shaders[fragmentIndex] != null) - { - fragmentOutputMap = shaders[fragmentIndex].Info.FragmentOutputMap; - } - - hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), new ShaderInfo(fragmentOutputMap)); - - task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => - { - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); - - return true; - }); - - return false; // Not finished: still need to compile the host program. - } - else - { - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); - - return true; - } - }); - - return false; // Not finished: translating the program. - }); - } - - _shaderCount = ++programIndex; - } - - // Process the queue. - for (int i = 0; i < activeTasks.Count; i++) - { - ShaderCompileTask task = activeTasks[i]; - - if (task.IsDone()) - { - activeTasks.RemoveAt(i--); - } - } - - if (activeTasks.Count == maxTaskCount) - { - // Wait for a task to be done, or for 1ms. - // Host shader compilation cannot signal when it is done, - // so the 1ms timeout is required to poll status. - - taskDoneEvent.WaitOne(1); - } - } - - if (!isReadOnly) - { - // Remove entries that are broken in the cache - _cacheManager.RemoveManifestEntries(invalidEntries); - _cacheManager.FlushToArchive(); - _cacheManager.Synchronize(); - } - - progressReportEvent.Set(); - progressReportThread?.Join(); - - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Loaded, _shaderCount, _totalShaderCount); - - Logger.Info?.Print(LogClass.Gpu, $"Shader cache loaded {_shaderCount} entries."); } } /// - /// Raises ShaderCacheStateChanged events periodically. + /// Shader cache state update handler. /// - private void ReportProgress(object state) + /// Current state of the shader cache load process + /// Number of the current shader being processed + /// Total number of shaders to process + private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total) { - const int refreshRate = 50; // ms - - AutoResetEvent endEvent = (AutoResetEvent)state; - - int count = 0; - - do - { - int newCount = _shaderCount; - - if (count != newCount) - { - ShaderCacheStateChanged?.Invoke(ShaderCacheState.Loading, newCount, _totalShaderCount); - count = newCount; - } - } - while (!endEvent.WaitOne(refreshRate)); + ShaderCacheStateChanged?.Invoke(state, current, total); } /// @@ -557,112 +189,42 @@ namespace Ryujinx.Graphics.Gpu.Shader /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU channel - /// GPU accessor state + /// Texture pool state + /// Compute engine state /// GPU virtual address of the binary shader code - /// Local group size X of the computer shader - /// Local group size Y of the computer shader - /// Local group size Z of the computer shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader /// Compiled compute shader code - public ShaderBundle GetComputeShader( + public CachedShaderProgram GetComputeShader( GpuChannel channel, - GpuAccessorState gas, - ulong gpuVa, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) + GpuChannelPoolState poolState, + GpuChannelComputeState computeState, + ulong gpuVa) { - bool isCached = _cpPrograms.TryGetValue(gpuVa, out List list); - - if (isCached) + if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa)) { - foreach (ShaderBundle cachedCpShader in list) - { - if (IsShaderEqual(channel.MemoryManager, cachedCpShader, gpuVa)) - { - return cachedCpShader; - } - } + return cpShader; } - TranslatorContext[] shaderContexts = new TranslatorContext[1]; - - shaderContexts[0] = DecodeComputeShader( - channel, - gas, - gpuVa, - localSizeX, - localSizeY, - localSizeZ, - localMemorySize, - sharedMemorySize); - - bool isShaderCacheEnabled = _cacheManager != null; - bool isShaderCacheReadOnly = false; - - Hash128 programCodeHash = default; - GuestShaderCacheEntry[] shaderCacheEntries = null; - - // Current shader cache doesn't support bindless textures - if (shaderContexts[0].UsedFeatures.HasFlag(FeatureFlags.Bindless)) + if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode)) { - isShaderCacheEnabled = false; + _cpPrograms[gpuVa] = cpShader; + return cpShader; } - if (isShaderCacheEnabled) - { - isShaderCacheReadOnly = _cacheManager.IsReadOnly; + ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); - // Compute hash and prepare data for shader disk cache comparison. - shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts); - programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries); - } + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa); - ShaderBundle cpShader; + TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); - // Search for the program hash in loaded shaders. - if (!isShaderCacheEnabled || !_cpProgramsDiskCache.TryGetValue(programCodeHash, out cpShader)) - { - if (isShaderCacheEnabled) - { - Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); - } + IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1)); - // The shader isn't currently cached, translate it and compile it. - ShaderCodeHolder shader = TranslateShader(_dumper, channel.MemoryManager, shaderContexts[0], null, null); + cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); - - IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, new ShaderInfo(-1)); - - cpShader = new ShaderBundle(hostProgram, shader); - - if (isShaderCacheEnabled) - { - _cpProgramsDiskCache.Add(programCodeHash, cpShader); - - if (!isShaderCacheReadOnly) - { - byte[] guestProgramDump = CacheHelper.CreateGuestProgramDump(shaderCacheEntries); - _programsToSaveQueue.Enqueue((hostProgram, (byte[] hostProgramBinary) => - { - _cacheManager.SaveProgram(ref programCodeHash, guestProgramDump, HostShaderCacheEntry.Create(hostProgramBinary, new ShaderCodeHolder[] { shader })); - })); - } - } - } - - if (!isCached) - { - list = new List(); - - _cpPrograms.Add(gpuVa, list); - } - - list.Add(cpShader); + _computeShaderCache.Add(cpShader); + EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram)); + _cpPrograms[gpuVa] = cpShader; return cpShader; } @@ -676,143 +238,141 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// GPU state /// GPU channel - /// GPU accessor state + /// Texture pool state + /// 3D engine state /// Addresses of the shaders for each stage /// Compiled graphics shader code - public ShaderBundle GetGraphicsShader(ref ThreedClassState state, GpuChannel channel, GpuAccessorState gas, ShaderAddresses addresses) + public CachedShaderProgram GetGraphicsShader( + ref ThreedClassState state, + GpuChannel channel, + GpuChannelPoolState poolState, + GpuChannelGraphicsState graphicsState, + ShaderAddresses addresses) { - bool isCached = _gpPrograms.TryGetValue(addresses, out List list); - - if (isCached) + if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, poolState, gpShaders, addresses)) { - foreach (ShaderBundle cachedGpShaders in list) + return gpShaders; + } + + if (_graphicsShaderCache.TryFind(channel, poolState, addresses, out gpShaders, out var cachedGuestCode)) + { + _gpPrograms[addresses] = gpShaders; + return gpShaders; + } + + TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); + + ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors); + GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); + + ReadOnlySpan addressesSpan = addresses.AsSpan(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + ulong gpuVa = addressesSpan[stageIndex + 1]; + + if (gpuVa != 0) { - if (IsShaderEqual(channel.MemoryManager, cachedGpShaders, addresses)) + GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa); + + if (nextStage != null) { - return cachedGpShaders; + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && addresses.VertexA != 0) + { + translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; + List shaderSources = new List(); + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + if (stageIndex == 0 && translatorContexts[0] != null) + { + TranslatedShaderVertexPair translatedShader = TranslateShader( + _dumper, + channel, + currentStage, + translatorContexts[0], + cachedGuestCode.VertexACode, + cachedGuestCode.VertexBCode); + + shaders[0] = translatedShader.VertexA; + shaders[1] = translatedShader.VertexB; + program = translatedShader.Program; + } + else + { + byte[] code = cachedGuestCode.GetByIndex(stageIndex); + + TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); + + shaders[stageIndex + 1] = translatedShader.Shader; + program = translatedShader.Program; + } + + if (program != null) + { + shaderSources.Add(CreateShaderSource(program)); } } } - TranslatorContext[] shaderContexts = new TranslatorContext[Constants.ShaderStages + 1]; + int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap)); - TransformFeedbackDescriptor[] tfd = GetTransformFeedbackDescriptors(ref state); + gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); - gas.TransformFeedbackDescriptors = tfd; - - TranslationCounts counts = new TranslationCounts(); - - if (addresses.VertexA != 0) - { - shaderContexts[0] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags | TranslationFlags.VertexA, ShaderStage.Vertex, addresses.VertexA); - } - - shaderContexts[1] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Vertex, addresses.Vertex); - shaderContexts[2] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.TessellationControl, addresses.TessControl); - shaderContexts[3] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.TessellationEvaluation, addresses.TessEvaluation); - shaderContexts[4] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Geometry, addresses.Geometry); - shaderContexts[5] = DecodeGraphicsShader(channel, gas, counts, DefaultFlags, ShaderStage.Fragment, addresses.Fragment); - - bool isShaderCacheEnabled = _cacheManager != null; - bool isShaderCacheReadOnly = false; - - Hash128 programCodeHash = default; - GuestShaderCacheEntry[] shaderCacheEntries = null; - - // Current shader cache doesn't support bindless textures - for (int i = 0; i < shaderContexts.Length; i++) - { - if (shaderContexts[i] != null && shaderContexts[i].UsedFeatures.HasFlag(FeatureFlags.Bindless)) - { - isShaderCacheEnabled = false; - break; - } - } - - if (isShaderCacheEnabled) - { - isShaderCacheReadOnly = _cacheManager.IsReadOnly; - - // Compute hash and prepare data for shader disk cache comparison. - shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts); - programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd); - } - - ShaderBundle gpShaders; - - // Search for the program hash in loaded shaders. - if (!isShaderCacheEnabled || !_gpProgramsDiskCache.TryGetValue(programCodeHash, out gpShaders)) - { - if (isShaderCacheEnabled) - { - Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); - } - - // The shader isn't currently cached, translate it and compile it. - ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages]; - - for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) - { - shaders[stageIndex] = TranslateShader(_dumper, channel.MemoryManager, shaderContexts, stageIndex + 1); - } - - List hostShaders = new List(); - - for (int stage = 0; stage < Constants.ShaderStages; stage++) - { - ShaderProgram program = shaders[stage]?.Program; - - if (program == null) - { - continue; - } - - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; - - hostShaders.Add(hostShader); - } - - int fragmentIndex = (int)ShaderStage.Fragment - 1; - int fragmentOutputMap = -1; - - if (shaders[fragmentIndex] != null) - { - fragmentOutputMap = shaders[fragmentIndex].Info.FragmentOutputMap; - } - - IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), new ShaderInfo(fragmentOutputMap)); - - gpShaders = new ShaderBundle(hostProgram, shaders); - - if (isShaderCacheEnabled) - { - _gpProgramsDiskCache.Add(programCodeHash, gpShaders); - - if (!isShaderCacheReadOnly) - { - byte[] guestProgramDump = CacheHelper.CreateGuestProgramDump(shaderCacheEntries, tfd); - _programsToSaveQueue.Enqueue((hostProgram, (byte[] hostProgramBinary) => - { - _cacheManager.SaveProgram(ref programCodeHash, guestProgramDump, HostShaderCacheEntry.Create(hostProgramBinary, shaders)); - })); - } - } - } - - if (!isCached) - { - list = new List(); - - _gpPrograms.Add(addresses, list); - } - - list.Add(gpShaders); + _graphicsShaderCache.Add(gpShaders); + EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram)); + _gpPrograms[addresses] = gpShaders; return gpShaders; } + /// + /// Creates a shader source for use with the backend from a translated shader program. + /// + /// Translated shader program + /// Shader source + public static ShaderSource CreateShaderSource(ShaderProgram program) + { + return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language); + } + + /// + /// Puts a program on the queue of programs to be saved on the disk cache. + /// + /// + /// This will not do anything if disk shader cache is disabled. + /// + /// Program to be saved on disk + private void EnqueueProgramToSave(ProgramToSave programToSave) + { + if (_diskCacheHostStorage.CacheEnabled) + { + _programsToSaveQueue.Enqueue(programToSave); + } + } + /// /// Gets transform feedback state from the current GPU state. /// @@ -821,7 +381,6 @@ namespace Ryujinx.Graphics.Gpu.Shader private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state) { bool tfEnable = state.TfEnable; - if (!tfEnable) { return null; @@ -833,11 +392,11 @@ namespace Ryujinx.Graphics.Gpu.Shader { var tf = state.TfState[i]; - int length = (int)Math.Min((uint)tf.VaryingsCount, 0x80); - - var varyingLocations = MemoryMarshal.Cast(state.TfVaryingLocations[i].ToSpan()).Slice(0, length); - - descs[i] = new TransformFeedbackDescriptor(tf.BufferIndex, tf.Stride, varyingLocations.ToArray()); + descs[i] = new TransformFeedbackDescriptor( + tf.BufferIndex, + tf.Stride, + tf.VaryingsCount, + ref state.TfVaryingLocations[i]); } return descs; @@ -846,46 +405,54 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Checks if compute shader code in memory is equal to the cached shader. /// - /// Memory manager used to access the GPU memory where the shader is located + /// GPU channel using the shader + /// GPU channel state to verify shader compatibility /// Cached compute shader /// GPU virtual address of the shader code in memory /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderBundle cpShader, ulong gpuVa) + private static bool IsShaderEqual( + GpuChannel channel, + GpuChannelPoolState poolState, + CachedShaderProgram cpShader, + ulong gpuVa) { - return IsShaderEqual(memoryManager, cpShader.Shaders[0], gpuVa); + if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) + { + return cpShader.SpecializationState.MatchesCompute(channel, poolState); + } + + return false; } /// /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. /// - /// Memory manager used to access the GPU memory where the shader is located + /// GPU channel using the shader + /// GPU channel state to verify shader compatibility /// Cached graphics shaders /// GPU virtual addresses of all enabled shader stages /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderBundle gpShaders, ShaderAddresses addresses) + private static bool IsShaderEqual( + GpuChannel channel, + GpuChannelPoolState poolState, + CachedShaderProgram gpShaders, + ShaderAddresses addresses) { - for (int stage = 0; stage < gpShaders.Shaders.Length; stage++) + ReadOnlySpan addressesSpan = addresses.AsSpan(); + + for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++) { - ShaderCodeHolder shader = gpShaders.Shaders[stage]; + CachedShaderStage shader = gpShaders.Shaders[stageIndex]; - ulong gpuVa = 0; + ulong gpuVa = addressesSpan[stageIndex]; - switch (stage) - { - case 0: gpuVa = addresses.Vertex; break; - case 1: gpuVa = addresses.TessControl; break; - case 2: gpuVa = addresses.TessEvaluation; break; - case 3: gpuVa = addresses.Geometry; break; - case 4: gpuVa = addresses.Fragment; break; - } - - if (!IsShaderEqual(memoryManager, shader, gpuVa, addresses.VertexA)) + if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa)) { return false; } } - return true; + return gpShaders.SpecializationState.MatchesGraphics(channel, poolState); } /// @@ -894,9 +461,8 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Memory manager used to access the GPU memory where the shader is located /// Cached shader to compare with /// GPU virtual address of the binary shader code - /// Optional GPU virtual address of the "Vertex A" binary shader code /// True if the code is different, false otherwise - private static bool IsShaderEqual(MemoryManager memoryManager, ShaderCodeHolder shader, ulong gpuVa, ulong gpuVaA = 0) + private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa) { if (shader == null) { @@ -905,47 +471,17 @@ namespace Ryujinx.Graphics.Gpu.Shader ReadOnlySpan memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length); - bool equals = memoryCode.SequenceEqual(shader.Code); - - if (equals && shader.Code2 != null) - { - memoryCode = memoryManager.GetSpan(gpuVaA, shader.Code2.Length); - - equals = memoryCode.SequenceEqual(shader.Code2); - } - - return equals; + return memoryCode.SequenceEqual(shader.Code); } /// /// Decode the binary Maxwell shader code to a translator context. /// - /// GPU channel - /// GPU accessor state + /// GPU state accessor /// GPU virtual address of the binary shader code - /// Local group size X of the computer shader - /// Local group size Y of the computer shader - /// Local group size Z of the computer shader - /// Local memory size of the compute shader - /// Shared memory size of the compute shader /// The generated translator context - private TranslatorContext DecodeComputeShader( - GpuChannel channel, - GpuAccessorState gas, - ulong gpuVa, - int localSizeX, - int localSizeY, - int localSizeZ, - int localMemorySize, - int sharedMemorySize) + public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa) { - if (gpuVa == 0) - { - return null; - } - - GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gas, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize); - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); return Translator.CreateContext(gpuVa, gpuAccessor, options); } @@ -956,126 +492,105 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// - /// GPU channel - /// GPU accessor state - /// Cumulative shader resource counts + /// GPU state accessor /// Flags that controls shader translation - /// Shader stage /// GPU virtual address of the shader code /// The generated translator context - private TranslatorContext DecodeGraphicsShader( - GpuChannel channel, - GpuAccessorState gas, - TranslationCounts counts, - TranslationFlags flags, - ShaderStage stage, - ulong gpuVa) + public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa) { - if (gpuVa == 0) - { - return null; - } - - GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gas, (int)stage - 1); - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); - return Translator.CreateContext(gpuVa, gpuAccessor, options, counts); + return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper - /// Memory manager used to access the GPU memory where the shader is located - /// Translator context of all available shader stages - /// Index on the stages array to translate - /// Compiled graphics shader code - private static ShaderCodeHolder TranslateShader( - ShaderDumper dumper, - MemoryManager memoryManager, - TranslatorContext[] stages, - int stageIndex) - { - TranslatorContext currentStage = stages[stageIndex]; - TranslatorContext nextStage = GetNextStageContext(stages, stageIndex); - TranslatorContext vertexA = stageIndex == 1 ? stages[0] : null; - - return TranslateShader(dumper, memoryManager, currentStage, nextStage, vertexA); - } - - /// - /// Gets the next shader stage context, from an array of contexts and index of the current stage. - /// - /// Translator context of all available shader stages - /// Index on the stages array to translate - /// The translator context of the next stage, or null if inexistent - private static TranslatorContext GetNextStageContext(TranslatorContext[] stages, int stageIndex) - { - for (int nextStageIndex = stageIndex + 1; nextStageIndex < stages.Length; nextStageIndex++) - { - if (stages[nextStageIndex] != null) - { - return stages[nextStageIndex]; - } - } - - return null; - } - - /// - /// Translates a previously generated translator context to something that the host API accepts. - /// - /// Optional shader code dumper - /// Memory manager used to access the GPU memory where the shader is located + /// GPU channel using the shader /// Translator context of the stage to be translated - /// Translator context of the next active stage, if existent /// Optional translator context of the shader that should be combined + /// Optional Maxwell binary code of the Vertex A shader, if present + /// Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache /// Compiled graphics shader code - private static ShaderCodeHolder TranslateShader( + private static TranslatedShaderVertexPair TranslateShader( ShaderDumper dumper, - MemoryManager memoryManager, + GpuChannel channel, TranslatorContext currentStage, - TranslatorContext nextStage, - TranslatorContext vertexA) + TranslatorContext vertexA, + byte[] codeA, + byte[] codeB) { - if (currentStage == null) + ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); + + var memoryManager = channel.MemoryManager; + + codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); + codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); + byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray(); + byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray(); + + ShaderDumpPaths pathsA = default; + ShaderDumpPaths pathsB = default; + + if (dumper != null) { - return null; + pathsA = dumper.Dump(codeA, compute: false); + pathsB = dumper.Dump(codeB, compute: false); } - if (vertexA != null) + ShaderProgram program = currentStage.Translate(vertexA); + + pathsB.Prepend(program); + pathsA.Prepend(program); + + CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA); + CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB); + + return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program); + } + + /// + /// Translates a previously generated translator context to something that the host API accepts. + /// + /// Optional shader code dumper + /// GPU channel using the shader + /// Translator context of the stage to be translated + /// Optional Maxwell binary code of the current stage shader, if present on cache + /// Compiled graphics shader code + private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) + { + var memoryManager = channel.MemoryManager; + + ulong cb1DataAddress = context.Stage == ShaderStage.Compute + ? channel.BufferManager.GetComputeUniformBufferAddress(1) + : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1); + + byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray(); + code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); + + ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; + ShaderProgram program = context.Translate(); + + paths.Prepend(program); + + return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program); + } + + /// + /// Gets the index of a stage from a . + /// + /// Stage to get the index from + /// Stage index + private static int StageToStageIndex(ShaderStage stage) + { + return stage switch { - byte[] codeA = memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); - byte[] codeB = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); - - ShaderDumpPaths pathsA = default; - ShaderDumpPaths pathsB = default; - - if (dumper != null) - { - pathsA = dumper.Dump(codeA, compute: false); - pathsB = dumper.Dump(codeB, compute: false); - } - - ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage, vertexA); - - pathsB.Prepend(program); - pathsA.Prepend(program); - - return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA); - } - else - { - byte[] code = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); - - ShaderDumpPaths paths = dumper?.Dump(code, currentStage.Stage == ShaderStage.Compute) ?? default; - - ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage); - - paths.Prepend(program); - - return new ShaderCodeHolder(program, shaderProgramInfo, code); - } + ShaderStage.TessellationControl => 1, + ShaderStage.TessellationEvaluation => 2, + ShaderStage.Geometry => 3, + ShaderStage.Fragment => 4, + _ => 0 + }; } /// @@ -1084,23 +599,17 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public void Dispose() { - foreach (List list in _cpPrograms.Values) + foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms()) { - foreach (ShaderBundle bundle in list) - { - bundle.Dispose(); - } + program.Dispose(); } - foreach (List list in _gpPrograms.Values) + foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms()) { - foreach (ShaderBundle bundle in list) - { - bundle.Dispose(); - } + program.Dispose(); } - _cacheManager?.Dispose(); + _cacheWriter?.Dispose(); } } } diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs new file mode 100644 index 000000000..065f9ba90 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCacheHashTable.cs @@ -0,0 +1,280 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Holds already cached code for a guest shader. + /// + struct CachedGraphicsGuestCode + { + public byte[] VertexACode; + public byte[] VertexBCode; + public byte[] TessControlCode; + public byte[] TessEvaluationCode; + public byte[] GeometryCode; + public byte[] FragmentCode; + + /// + /// Gets the guest code of a shader stage by its index. + /// + /// Index of the shader stage + /// Guest code, or null if not present + public byte[] GetByIndex(int stageIndex) + { + return stageIndex switch + { + 1 => TessControlCode, + 2 => TessEvaluationCode, + 3 => GeometryCode, + 4 => FragmentCode, + _ => VertexBCode + }; + } + } + + /// + /// Graphics shader cache hash table. + /// + class ShaderCacheHashTable + { + /// + /// Shader ID cache. + /// + private struct IdCache + { + private PartitionedHashTable _cache; + private int _id; + + /// + /// Initializes the state. + /// + public void Initialize() + { + _cache = new PartitionedHashTable(); + _id = 0; + } + + /// + /// Adds guest code to the cache. + /// + /// + /// If the code was already cached, it will just return the existing ID. + /// + /// Code to add + /// Unique ID for the guest code + public int Add(byte[] code) + { + int id = ++_id; + int cachedId = _cache.GetOrAdd(code, id); + if (cachedId != id) + { + --_id; + } + + return cachedId; + } + + /// + /// Tries to find cached guest code. + /// + /// Code accessor used to read guest code to find a match on the hash table + /// ID of the guest code, if found + /// Cached guest code, if found + /// True if found, false otherwise + public bool TryFind(IDataAccessor dataAccessor, out int id, out byte[] data) + { + return _cache.TryFindItem(dataAccessor, out id, out data); + } + } + + /// + /// Guest code IDs of the guest shaders that when combined forms a single host program. + /// + private struct IdTable : IEquatable + { + public int VertexAId; + public int VertexBId; + public int TessControlId; + public int TessEvaluationId; + public int GeometryId; + public int FragmentId; + + public override bool Equals(object obj) + { + return obj is IdTable other && Equals(other); + } + + public bool Equals(IdTable other) + { + return other.VertexAId == VertexAId && + other.VertexBId == VertexBId && + other.TessControlId == TessControlId && + other.TessEvaluationId == TessEvaluationId && + other.GeometryId == GeometryId && + other.FragmentId == FragmentId; + } + + public override int GetHashCode() + { + return HashCode.Combine(VertexAId, VertexBId, TessControlId, TessEvaluationId, GeometryId, FragmentId); + } + } + + private IdCache _vertexACache; + private IdCache _vertexBCache; + private IdCache _tessControlCache; + private IdCache _tessEvaluationCache; + private IdCache _geometryCache; + private IdCache _fragmentCache; + + private readonly Dictionary _shaderPrograms; + + /// + /// Creates a new graphics shader cache hash table. + /// + public ShaderCacheHashTable() + { + _vertexACache.Initialize(); + _vertexBCache.Initialize(); + _tessControlCache.Initialize(); + _tessEvaluationCache.Initialize(); + _geometryCache.Initialize(); + _fragmentCache.Initialize(); + + _shaderPrograms = new Dictionary(); + } + + /// + /// Adds a program to the cache. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + IdTable idTable = new IdTable(); + + foreach (var shader in program.Shaders) + { + if (shader == null) + { + continue; + } + + if (shader.Info != null) + { + switch (shader.Info.Stage) + { + case ShaderStage.Vertex: + idTable.VertexBId = _vertexBCache.Add(shader.Code); + break; + case ShaderStage.TessellationControl: + idTable.TessControlId = _tessControlCache.Add(shader.Code); + break; + case ShaderStage.TessellationEvaluation: + idTable.TessEvaluationId = _tessEvaluationCache.Add(shader.Code); + break; + case ShaderStage.Geometry: + idTable.GeometryId = _geometryCache.Add(shader.Code); + break; + case ShaderStage.Fragment: + idTable.FragmentId = _fragmentCache.Add(shader.Code); + break; + } + } + else + { + idTable.VertexAId = _vertexACache.Add(shader.Code); + } + } + + if (!_shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + specList = new ShaderSpecializationList(); + _shaderPrograms.Add(idTable, specList); + } + + specList.Add(program); + } + + /// + /// Tries to find a cached program. + /// + /// + /// Even if false is returned, might still contain cached guest code. + /// This can be used to avoid additional allocations for guest code that was already cached. + /// + /// GPU channel + /// Texture pool state + /// Guest addresses of the shaders to find + /// Cached host program for the given state, if found + /// Cached guest code, if any found + /// True if a cached host program was found, false otherwise + public bool TryFind( + GpuChannel channel, + GpuChannelPoolState poolState, + ShaderAddresses addresses, + out CachedShaderProgram program, + out CachedGraphicsGuestCode guestCode) + { + var memoryManager = channel.MemoryManager; + IdTable idTable = new IdTable(); + guestCode = new CachedGraphicsGuestCode(); + + program = null; + + bool found = TryGetId(_vertexACache, memoryManager, addresses.VertexA, out idTable.VertexAId, out guestCode.VertexACode); + found &= TryGetId(_vertexBCache, memoryManager, addresses.VertexB, out idTable.VertexBId, out guestCode.VertexBCode); + found &= TryGetId(_tessControlCache, memoryManager, addresses.TessControl, out idTable.TessControlId, out guestCode.TessControlCode); + found &= TryGetId(_tessEvaluationCache, memoryManager, addresses.TessEvaluation, out idTable.TessEvaluationId, out guestCode.TessEvaluationCode); + found &= TryGetId(_geometryCache, memoryManager, addresses.Geometry, out idTable.GeometryId, out guestCode.GeometryCode); + found &= TryGetId(_fragmentCache, memoryManager, addresses.Fragment, out idTable.FragmentId, out guestCode.FragmentCode); + + if (found && _shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList)) + { + return specList.TryFindForGraphics(channel, poolState, out program); + } + + return false; + } + + /// + /// Tries to get the ID of a single cached shader stage. + /// + /// ID cache of the stage + /// GPU memory manager + /// Base address of the shader + /// ID, if found + /// Cached guest code, if found + /// True if a cached shader is found, false otherwise + private static bool TryGetId(IdCache idCache, MemoryManager memoryManager, ulong baseAddress, out int id, out byte[] data) + { + if (baseAddress == 0) + { + id = 0; + data = null; + return true; + } + + ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(memoryManager, baseAddress); + return idCache.TryFind(codeAccessor, out id, out data); + } + + /// + /// Gets all programs that have been added to the table. + /// + /// Programs added to the table + public IEnumerable GetPrograms() + { + foreach (var specList in _shaderPrograms.Values) + { + foreach (var program in specList) + { + yield return program; + } + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs new file mode 100644 index 000000000..dbb33d224 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeAccessor.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader.HashTable; +using System; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Shader code accessor. + /// + struct ShaderCodeAccessor : IDataAccessor + { + private readonly MemoryManager _memoryManager; + private readonly ulong _baseAddress; + + /// + /// Creates a new shader code accessor. + /// + /// Memory manager used to access the shader code + /// Base address of the shader in memory + public ShaderCodeAccessor(MemoryManager memoryManager, ulong baseAddress) + { + _memoryManager = memoryManager; + _baseAddress = baseAddress; + } + + /// + public ReadOnlySpan GetSpan(int offset, int length) + { + return _memoryManager.GetSpanMapped(_baseAddress + (ulong)offset, length); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs deleted file mode 100644 index dbf2d6f59..000000000 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs +++ /dev/null @@ -1,52 +0,0 @@ -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - /// - /// Cached shader code for a single shader stage. - /// - class ShaderCodeHolder - { - /// - /// Shader program containing translated code. - /// - public ShaderProgram Program { get; } - - /// - /// Shader program information. - /// - public ShaderProgramInfo Info { get; } - - /// - /// Host shader object. - /// - /// Null if the host shader program cache is in use. - public IShader HostShader { get; set; } - - /// - /// Maxwell binary shader code. - /// - public byte[] Code { get; } - - /// - /// Optional maxwell binary shader code for "Vertex A" shader. - /// - public byte[] Code2 { get; } - - /// - /// Creates a new instace of the shader code holder. - /// - /// Shader program - /// Shader program information - /// Maxwell binary shader code - /// Optional binary shader code of the "Vertex A" shader, when combined with "Vertex B" - public ShaderCodeHolder(ShaderProgram program, ShaderProgramInfo info, byte[] code, byte[] code2 = null) - { - Program = program; - Info = info; - Code = code; - Code2 = code2; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs deleted file mode 100644 index a9283de23..000000000 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs +++ /dev/null @@ -1,95 +0,0 @@ -using Ryujinx.Graphics.GAL; -using System.Threading; -using System.Threading.Tasks; - -namespace Ryujinx.Graphics.Gpu.Shader -{ - delegate bool ShaderCompileTaskCallback(bool success, ShaderCompileTask task); - - /// - /// A class that represents a shader compilation. - /// - class ShaderCompileTask - { - private bool _compiling; - - private Task _programsTask; - private IProgram _program; - - private ShaderCompileTaskCallback _action; - private AutoResetEvent _taskDoneEvent; - - public bool IsFaulted => _programsTask.IsFaulted; - - /// - /// Create a new shader compile task, with an event to signal whenever a subtask completes. - /// - /// Event to signal when a subtask completes - public ShaderCompileTask(AutoResetEvent taskDoneEvent) - { - _taskDoneEvent = taskDoneEvent; - } - - /// - /// Check the completion status of the shader compile task, and run callbacks on step completion. - /// Calling this periodically is required to progress through steps of the compilation. - /// - /// True if the task is complete, false if it is in progress - public bool IsDone() - { - if (_compiling) - { - ProgramLinkStatus status = _program.CheckProgramLink(false); - - if (status != ProgramLinkStatus.Incomplete) - { - return _action(status == ProgramLinkStatus.Success, this); - } - } - else - { - // Waiting on the task. - - if (_programsTask.IsCompleted) - { - return _action(true, this); - } - } - - return false; - } - - /// - /// Run a callback when the specified task has completed. - /// - /// The task object that needs to complete - /// The action to perform when it is complete - public void OnTask(Task task, ShaderCompileTaskCallback action) - { - _compiling = false; - - _programsTask = task; - _action = action; - - task.ContinueWith(task => _taskDoneEvent.Set()); - } - - /// - /// Run a callback when the specified program has been linked. - /// - /// The program that needs to be linked - /// The action to perform when linking is complete - public void OnCompiled(IProgram program, ShaderCompileTaskCallback action) - { - _compiling = true; - - _program = program; - _action = action; - - if (program == null) - { - action(false, this); - } - } - } -} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs new file mode 100644 index 000000000..87e087544 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs @@ -0,0 +1,76 @@ +using System.Collections; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// List of cached shader programs that differs only by specialization state. + /// + class ShaderSpecializationList : IEnumerable + { + private readonly List _entries = new List(); + + /// + /// Adds a program to the list. + /// + /// Program to be added + public void Add(CachedShaderProgram program) + { + _entries.Add(program); + } + + /// + /// Tries to find an existing 3D program on the cache. + /// + /// GPU channel + /// Texture pool state + /// Cached program, if found + /// True if a compatible program is found, false otherwise + public bool TryFindForGraphics(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + if (entry.SpecializationState.MatchesGraphics(channel, poolState)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + /// + /// Tries to find an existing compute program on the cache. + /// + /// GPU channel + /// Texture pool state + /// Cached program, if found + /// True if a compatible program is found, false otherwise + public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program) + { + foreach (var entry in _entries) + { + if (entry.SpecializationState.MatchesCompute(channel, poolState)) + { + program = entry; + return true; + } + } + + program = default; + return false; + } + + public IEnumerator GetEnumerator() + { + return _entries.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs new file mode 100644 index 000000000..2bbc3d2c1 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -0,0 +1,615 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Gpu.Shader.DiskCache; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + class ShaderSpecializationState + { + private const uint ComsMagic = (byte)'C' | ((byte)'O' << 8) | ((byte)'M' << 16) | ((byte)'S' << 24); + private const uint GfxsMagic = (byte)'G' | ((byte)'F' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24); + private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24); + private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + + /// + /// Flags indicating GPU state that is used by the shader. + /// + [Flags] + private enum QueriedStateFlags + { + EarlyZForce = 1 << 0, + PrimitiveTopology = 1 << 1, + TessellationMode = 1 << 2, + TransformFeedback = 1 << 3 + } + + private QueriedStateFlags _queriedState; + private bool _compute; + private byte _constantBufferUsePerStage; + + /// + /// Compute engine state. + /// + public GpuChannelComputeState ComputeState; + + /// + /// 3D engine state. + /// + public GpuChannelGraphicsState GraphicsState; + + /// + /// Contant buffers bound at the time the shader was compiled, per stage. + /// + public Array5 ConstantBufferUse; + + /// + /// Transform feedback buffers active at the time the shader was compiled. + /// + public TransformFeedbackDescriptor[] TransformFeedbackDescriptors; + + /// + /// Flags indicating texture state that is used by the shader. + /// + [Flags] + private enum QueriedTextureStateFlags + { + TextureFormat = 1 << 0, + SamplerType = 1 << 1, + CoordNormalized = 1 << 2 + } + + /// + /// Reference type wrapping a value. + /// + private class Box + { + /// + /// Wrapped value. + /// + public T Value; + } + + /// + /// State of a texture or image that is accessed by the shader. + /// + private struct TextureSpecializationState + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Flags indicating which state of the texture the shader depends on. + /// + public QueriedTextureStateFlags QueriedFlags; + + /// + /// Encoded texture format value. + /// + public uint Format; + + /// + /// True if the texture format is sRGB, false otherwise. + /// + public bool FormatSrgb; + + /// + /// Texture target. + /// + public Image.TextureTarget TextureTarget; + + /// + /// Indicates if the coordinates used to sample the texture are normalized or not (0.0..1.0 or 0..Width/Height). + /// + public bool CoordNormalized; + } + + /// + /// Texture binding information, used to identify each texture accessed by the shader. + /// + private struct TextureKey : IEquatable + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + /// + /// Shader stage where the texture is used. + /// + public readonly int StageIndex; + + /// + /// Texture handle offset in words on the texture buffer. + /// + public readonly int Handle; + + /// + /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register). + /// + public readonly int CbufSlot; + + /// + /// Creates a new texture key. + /// + /// Shader stage where the texture is used + /// Texture handle offset in words on the texture buffer + /// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register) + public TextureKey(int stageIndex, int handle, int cbufSlot) + { + StageIndex = stageIndex; + Handle = handle; + CbufSlot = cbufSlot; + } + + public override bool Equals(object obj) + { + return obj is TextureKey textureKey && Equals(textureKey); + } + + public bool Equals(TextureKey other) + { + return StageIndex == other.StageIndex && Handle == other.Handle && CbufSlot == other.CbufSlot; + } + + public override int GetHashCode() + { + return HashCode.Combine(StageIndex, Handle, CbufSlot); + } + } + + private readonly Dictionary> _textureSpecialization; + + /// + /// Creates a new instance of the shader specialization state. + /// + private ShaderSpecializationState() + { + _textureSpecialization = new Dictionary>(); + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current compute engine state + public ShaderSpecializationState(GpuChannelComputeState state) : this() + { + ComputeState = state; + _compute = true; + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState(GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() + { + GraphicsState = state; + _compute = false; + + if (descriptors != null) + { + TransformFeedbackDescriptors = descriptors; + _queriedState |= QueriedStateFlags.TransformFeedback; + } + } + + /// + /// Indicates that the shader accesses the early Z force state. + /// + public void RecordEarlyZForce() + { + _queriedState |= QueriedStateFlags.EarlyZForce; + } + + /// + /// Indicates that the shader accesses the primitive topology state. + /// + public void RecordPrimitiveTopology() + { + _queriedState |= QueriedStateFlags.PrimitiveTopology; + } + + /// + /// Indicates that the shader accesses the tessellation mode state. + /// + public void RecordTessellationMode() + { + _queriedState |= QueriedStateFlags.TessellationMode; + } + + /// + /// Indicates that the shader accesses the constant buffer use state. + /// + /// Shader stage index + /// Mask indicating the constant buffers bound at the time of the shader compilation + public void RecordConstantBufferUse(int stageIndex, uint useMask) + { + ConstantBufferUse[stageIndex] = useMask; + _constantBufferUsePerStage |= (byte)(1 << stageIndex); + } + + /// + /// Indicates that a given texture is accessed by the shader. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Descriptor of the texture + public void RegisterTexture(int stageIndex, int handle, int cbufSlot, Image.TextureDescriptor descriptor) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = descriptor.UnpackFormat(); + state.Value.FormatSrgb = descriptor.UnpackSrgb(); + state.Value.TextureTarget = descriptor.UnpackTextureTarget(); + state.Value.CoordNormalized = descriptor.UnpackTextureCoordNormalized(); + } + + /// + /// Indicates that a given texture is accessed by the shader. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Maxwell texture format value + /// Whenever the texture format is a sRGB format + /// Texture target type + /// Whenever the texture coordinates used on the shader are considered normalized + public void RegisterTexture( + int stageIndex, + int handle, + int cbufSlot, + uint format, + bool formatSrgb, + Image.TextureTarget target, + bool coordNormalized) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.Format = format; + state.Value.FormatSrgb = formatSrgb; + state.Value.TextureTarget = target; + state.Value.CoordNormalized = coordNormalized; + } + + /// + /// Indicates that the format of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureFormat(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.TextureFormat; + } + + /// + /// Indicates that the target of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureSamplerType(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.SamplerType; + } + + /// + /// Indicates that the coordinate normalization state of a given texture was used during the shader translation process. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public void RecordTextureCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + Box state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot); + state.Value.QueriedFlags |= QueriedTextureStateFlags.CoordNormalized; + } + + /// + /// Checks if a given texture was registerd on this specialization state. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public bool TextureRegistered(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot) != null; + } + + /// + /// Gets the recorded format of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public (uint, bool) GetFormat(int stageIndex, int handle, int cbufSlot) + { + TextureSpecializationState state = GetTextureSpecState(stageIndex, handle, cbufSlot).Value; + return (state.Format, state.FormatSrgb); + } + + /// + /// Gets the recorded target of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public Image.TextureTarget GetTextureTarget(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.TextureTarget; + } + + /// + /// Gets the recorded coordinate normalization state of a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + public bool GetCoordNormalized(int stageIndex, int handle, int cbufSlot) + { + return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.CoordNormalized; + } + + /// + /// Gets texture specialization state for a given texture, or create a new one if not present. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Texture specialization state + private Box GetOrCreateTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (!_textureSpecialization.TryGetValue(key, out Box state)) + { + _textureSpecialization.Add(key, state = new Box()); + } + + return state; + } + + /// + /// Gets texture specialization state for a given texture. + /// + /// Shader stage where the texture is used + /// Offset in words of the texture handle on the texture buffer + /// Slot of the texture buffer constant buffer + /// Texture specialization state + private Box GetTextureSpecState(int stageIndex, int handle, int cbufSlot) + { + TextureKey key = new TextureKey(stageIndex, handle, cbufSlot); + + if (_textureSpecialization.TryGetValue(key, out Box state)) + { + return state; + } + + return null; + } + + /// + /// Checks if the recorded state matches the current GPU 3D engine state. + /// + /// GPU channel + /// Texture pool state + /// True if the state matches, false otherwise + public bool MatchesGraphics(GpuChannel channel, GpuChannelPoolState poolState) + { + return Matches(channel, poolState, isCompute: false); + } + + /// + /// Checks if the recorded state matches the current GPU compute engine state. + /// + /// GPU channel + /// Texture pool state + /// True if the state matches, false otherwise + public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState) + { + return Matches(channel, poolState, isCompute: true); + } + + /// + /// Checks if the recorded state matches the current GPU state. + /// + /// GPU channel + /// Texture pool state + /// Indicates whenever the check is requested by the 3D or compute engine + /// True if the state matches, false otherwise + private bool Matches(GpuChannel channel, GpuChannelPoolState poolState, bool isCompute) + { + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + + uint useMask = isCompute + ? channel.BufferManager.GetComputeUniformBufferUseMask() + : channel.BufferManager.GetGraphicsUniformBufferUseMask(index); + + if (ConstantBufferUse[index] != useMask) + { + return false; + } + + constantBufferUsePerStageMask &= ~(1 << index); + } + + foreach (var kv in _textureSpecialization) + { + TextureKey textureKey = kv.Key; + + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex); + + ulong textureCbAddress; + ulong samplerCbAddress; + + if (isCompute) + { + textureCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex); + samplerCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex); + } + else + { + textureCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, textureBufferIndex); + samplerCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, samplerBufferIndex); + } + + if (!channel.MemoryManager.Physical.IsMapped(textureCbAddress) || !channel.MemoryManager.Physical.IsMapped(samplerCbAddress)) + { + continue; + } + + Image.TextureDescriptor descriptor; + + if (isCompute) + { + descriptor = channel.TextureManager.GetComputeTextureDescriptor( + poolState.TexturePoolGpuVa, + poolState.TextureBufferIndex, + poolState.TexturePoolMaximumId, + textureKey.Handle, + textureKey.CbufSlot); + } + else + { + descriptor = channel.TextureManager.GetGraphicsTextureDescriptor( + poolState.TexturePoolGpuVa, + poolState.TextureBufferIndex, + poolState.TexturePoolMaximumId, + textureKey.StageIndex, + textureKey.Handle, + textureKey.CbufSlot); + } + + Box specializationState = kv.Value; + + if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) && + specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized()) + { + return false; + } + } + + return true; + } + + /// + /// Reads shader specialization state that has been serialized. + /// + /// Data reader + /// Shader specialization state + public static ShaderSpecializationState Read(ref BinarySerializer dataReader) + { + ShaderSpecializationState specState = new ShaderSpecializationState(); + + dataReader.Read(ref specState._queriedState); + dataReader.Read(ref specState._compute); + + if (specState._compute) + { + dataReader.ReadWithMagicAndSize(ref specState.ComputeState, ComsMagic); + } + else + { + dataReader.ReadWithMagicAndSize(ref specState.GraphicsState, GfxsMagic); + } + + dataReader.Read(ref specState._constantBufferUsePerStage); + + int constantBufferUsePerStageMask = specState._constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataReader.Read(ref specState.ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = 0; + dataReader.Read(ref tfCount); + specState.TransformFeedbackDescriptors = new TransformFeedbackDescriptor[tfCount]; + + for (int index = 0; index < tfCount; index++) + { + dataReader.ReadWithMagicAndSize(ref specState.TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = 0; + dataReader.Read(ref count); + + for (int index = 0; index < count; index++) + { + TextureKey textureKey = default; + Box textureState = new Box(); + + dataReader.ReadWithMagicAndSize(ref textureKey, TexkMagic); + dataReader.ReadWithMagicAndSize(ref textureState.Value, TexsMagic); + + specState._textureSpecialization[textureKey] = textureState; + } + + return specState; + } + + /// + /// Serializes the shader specialization state. + /// + /// Data writer + public void Write(ref BinarySerializer dataWriter) + { + dataWriter.Write(ref _queriedState); + dataWriter.Write(ref _compute); + + if (_compute) + { + dataWriter.WriteWithMagicAndSize(ref ComputeState, ComsMagic); + } + else + { + dataWriter.WriteWithMagicAndSize(ref GraphicsState, GfxsMagic); + } + + dataWriter.Write(ref _constantBufferUsePerStage); + + int constantBufferUsePerStageMask = _constantBufferUsePerStage; + + while (constantBufferUsePerStageMask != 0) + { + int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask); + dataWriter.Write(ref ConstantBufferUse[index]); + constantBufferUsePerStageMask &= ~(1 << index); + } + + if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) + { + ushort tfCount = (ushort)TransformFeedbackDescriptors.Length; + dataWriter.Write(ref tfCount); + + for (int index = 0; index < TransformFeedbackDescriptors.Length; index++) + { + dataWriter.WriteWithMagicAndSize(ref TransformFeedbackDescriptors[index], TfbdMagic); + } + } + + ushort count = (ushort)_textureSpecialization.Count; + dataWriter.Write(ref count); + + foreach (var kv in _textureSpecialization) + { + var textureKey = kv.Key; + var textureState = kv.Value; + + dataWriter.WriteWithMagicAndSize(ref textureKey, TexkMagic); + dataWriter.WriteWithMagicAndSize(ref textureState.Value, TexsMagic); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs b/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs index eaa889cc5..09f1df760 100644 --- a/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/TransformFeedbackDescriptor.cs @@ -1,19 +1,58 @@ +using Ryujinx.Common.Memory; using System; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Shader { + /// + /// Transform feedback descriptor. + /// struct TransformFeedbackDescriptor { - public int BufferIndex { get; } - public int Stride { get; } + // New fields should be added to the end of the struct to keep disk shader cache compatibility. - public byte[] VaryingLocations { get; } + /// + /// Index of the transform feedback. + /// + public readonly int BufferIndex; - public TransformFeedbackDescriptor(int bufferIndex, int stride, byte[] varyingLocations) + /// + /// Amount of bytes consumed per vertex. + /// + public readonly int Stride; + + /// + /// Number of varyings written into the buffer. + /// + public readonly int VaryingCount; + + /// + /// Location of varyings to be written into the buffer. Each byte is one location. + /// + public Array32 VaryingLocations; // Making this readonly breaks AsSpan + + /// + /// Creates a new transform feedback descriptor. + /// + /// Index of the transform feedback + /// Amount of bytes consumed per vertex + /// Number of varyings written into the buffer. Indicates size in bytes of + /// Location of varyings to be written into the buffer. Each byte is one location + public TransformFeedbackDescriptor(int bufferIndex, int stride, int varyingCount, ref Array32 varyingLocations) { - BufferIndex = bufferIndex; - Stride = stride; - VaryingLocations = varyingLocations ?? throw new ArgumentNullException(nameof(varyingLocations)); + BufferIndex = bufferIndex; + Stride = stride; + VaryingCount = varyingCount; + VaryingLocations = varyingLocations; + } + + /// + /// Gets a span of the . + /// + /// Span of varying locations + public ReadOnlySpan AsSpan() + { + return MemoryMarshal.Cast(VaryingLocations.ToSpan()).Slice(0, Math.Min(128, VaryingCount)); } } } diff --git a/Ryujinx.Graphics.OpenGL/EnumConversion.cs b/Ryujinx.Graphics.OpenGL/EnumConversion.cs index 22e816056..24cf1fc4d 100644 --- a/Ryujinx.Graphics.OpenGL/EnumConversion.cs +++ b/Ryujinx.Graphics.OpenGL/EnumConversion.cs @@ -1,6 +1,7 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; namespace Ryujinx.Graphics.OpenGL { @@ -528,5 +529,19 @@ namespace Ryujinx.Graphics.OpenGL return All.Never; } + + public static ShaderType Convert(this ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => ShaderType.ComputeShader, + ShaderStage.Vertex => ShaderType.VertexShader, + ShaderStage.TessellationControl => ShaderType.TessControlShader, + ShaderStage.TessellationEvaluation => ShaderType.TessEvaluationShader, + ShaderStage.Geometry => ShaderType.GeometryShader, + ShaderStage.Fragment => ShaderType.FragmentShader, + _ => ShaderType.VertexShader + }; + } } } diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs index d2b559a8c..0cc722e6c 100644 --- a/Ryujinx.Graphics.OpenGL/Program.cs +++ b/Ryujinx.Graphics.OpenGL/Program.cs @@ -1,6 +1,8 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Buffers.Binary; @@ -24,46 +26,66 @@ namespace Ryujinx.Graphics.OpenGL } private ProgramLinkStatus _status = ProgramLinkStatus.Incomplete; - private IShader[] _shaders; + private int[] _shaderHandles; public bool HasFragmentShader; public int FragmentOutputMap { get; } - public Program(IShader[] shaders, int fragmentOutputMap) + public Program(ShaderSource[] shaders, int fragmentOutputMap) { Handle = GL.CreateProgram(); GL.ProgramParameter(Handle, ProgramParameterName.ProgramBinaryRetrievableHint, 1); + _shaderHandles = new int[shaders.Length]; + for (int index = 0; index < shaders.Length; index++) { - Shader shader = (Shader)shaders[index]; + ShaderSource shader = shaders[index]; - if (shader.IsFragment) + if (shader.Stage == ShaderStage.Fragment) { HasFragmentShader = true; } - GL.AttachShader(Handle, shader.Handle); + int shaderHandle = GL.CreateShader(shader.Stage.Convert()); + + switch (shader.Language) + { + case TargetLanguage.Glsl: + GL.ShaderSource(shaderHandle, shader.Code); + GL.CompileShader(shaderHandle); + break; + case TargetLanguage.Spirv: + GL.ShaderBinary(1, ref shaderHandle, (BinaryFormat)All.ShaderBinaryFormatSpirVArb, shader.BinaryCode, shader.BinaryCode.Length); + GL.SpecializeShader(shaderHandle, "main", 0, (int[])null, (int[])null); + break; + } + + GL.AttachShader(Handle, shaderHandle); + + _shaderHandles[index] = shaderHandle; } GL.LinkProgram(Handle); - _shaders = shaders; FragmentOutputMap = fragmentOutputMap; } public Program(ReadOnlySpan code, bool hasFragmentShader, int fragmentOutputMap) { - BinaryFormat binaryFormat = (BinaryFormat)BinaryPrimitives.ReadInt32LittleEndian(code.Slice(code.Length - 4, 4)); - Handle = GL.CreateProgram(); - unsafe + if (code.Length >= 4) { - fixed (byte* ptr = code) + BinaryFormat binaryFormat = (BinaryFormat)BinaryPrimitives.ReadInt32LittleEndian(code.Slice(code.Length - 4, 4)); + + unsafe { - GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); + fixed (byte* ptr = code) + { + GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); + } } } @@ -89,18 +111,7 @@ namespace Ryujinx.Graphics.OpenGL } GL.GetProgram(Handle, GetProgramParameterName.LinkStatus, out int status); - - if (_shaders != null) - { - for (int index = 0; index < _shaders.Length; index++) - { - int shaderHandle = ((Shader)_shaders[index]).Handle; - - GL.DetachShader(Handle, shaderHandle); - } - - _shaders = null; - } + DeleteShaders(); if (status == 0) { @@ -129,10 +140,25 @@ namespace Ryujinx.Graphics.OpenGL return data; } + private void DeleteShaders() + { + if (_shaderHandles != null) + { + foreach (int shaderHandle in _shaderHandles) + { + GL.DetachShader(Handle, shaderHandle); + GL.DeleteShader(shaderHandle); + } + + _shaderHandles = null; + } + } + public void Dispose() { if (Handle != 0) { + DeleteShaders(); GL.DeleteProgram(Handle); Handle = 0; diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 8a6b4689f..2a9ab4223 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -1,11 +1,10 @@ -using OpenTK.Graphics; -using OpenTK.Graphics.OpenGL; +using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.OpenGL.Image; using Ryujinx.Graphics.OpenGL.Queries; -using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; namespace Ryujinx.Graphics.OpenGL @@ -54,11 +53,6 @@ namespace Ryujinx.Graphics.OpenGL ResourcePool = new ResourcePool(); } - public IShader CompileShader(ShaderStage stage, string code) - { - return new Shader(stage, code); - } - public BufferHandle CreateBuffer(int size) { BufferCount++; @@ -66,7 +60,7 @@ namespace Ryujinx.Graphics.OpenGL return Buffer.Create(size); } - public IProgram CreateProgram(IShader[] shaders, ShaderInfo info) + public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) { return new Program(shaders, info.FragmentOutputMap); } @@ -101,6 +95,8 @@ namespace Ryujinx.Graphics.OpenGL public Capabilities GetCapabilities() { return new Capabilities( + api: TargetApi.OpenGL, + vendorName: GpuVendor, hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, supportsAstcCompression: HwCapabilities.SupportsAstcCompression, diff --git a/Ryujinx.Graphics.OpenGL/Shader.cs b/Ryujinx.Graphics.OpenGL/Shader.cs deleted file mode 100644 index 8374fa626..000000000 --- a/Ryujinx.Graphics.OpenGL/Shader.cs +++ /dev/null @@ -1,42 +0,0 @@ -using OpenTK.Graphics.OpenGL; -using Ryujinx.Graphics.GAL; -using Ryujinx.Graphics.Shader; - -namespace Ryujinx.Graphics.OpenGL -{ - class Shader : IShader - { - public int Handle { get; private set; } - public bool IsFragment { get; } - - public Shader(ShaderStage stage, string code) - { - ShaderType type = stage switch - { - ShaderStage.Compute => ShaderType.ComputeShader, - ShaderStage.Vertex => ShaderType.VertexShader, - ShaderStage.TessellationControl => ShaderType.TessControlShader, - ShaderStage.TessellationEvaluation => ShaderType.TessEvaluationShader, - ShaderStage.Geometry => ShaderType.GeometryShader, - ShaderStage.Fragment => ShaderType.FragmentShader, - _ => ShaderType.VertexShader - }; - - Handle = GL.CreateShader(type); - IsFragment = stage == ShaderStage.Fragment; - - GL.ShaderSource(Handle, code); - GL.CompileShader(Handle); - } - - public void Dispose() - { - if (Handle != 0) - { - GL.DeleteShader(Handle); - - Handle = 0; - } - } - } -} diff --git a/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/Ryujinx.Graphics.Shader/BufferDescriptor.cs index a3af6e41f..4ce8a896d 100644 --- a/Ryujinx.Graphics.Shader/BufferDescriptor.cs +++ b/Ryujinx.Graphics.Shader/BufferDescriptor.cs @@ -2,6 +2,8 @@ namespace Ryujinx.Graphics.Shader { public struct BufferDescriptor { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + public readonly int Binding; public readonly int Slot; public BufferUsageFlags Flags; diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 6fa4055aa..60ad540cb 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -373,7 +373,7 @@ namespace Ryujinx.Graphics.Shader.Decoders for (int i = 0; i < cbOffsetsCount; i++) { - uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4); + uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4); Block target = getBlock(baseOffset + targetOffset); target.Predecessors.Add(block); block.Successors.Add(target); diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index b2512868e..9c624d90d 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -2,153 +2,341 @@ namespace Ryujinx.Graphics.Shader { + /// + /// GPU state access interface. + /// public interface IGpuAccessor { + /// + /// Prints a log message. + /// + /// Message to print void Log(string message) { // No default log output. } + /// + /// Reads data from the constant buffer 1. + /// + /// Offset in bytes to read from + /// Value at the given offset uint ConstantBuffer1Read(int offset) { return 0; } + /// + /// Gets a span of the specified memory location, containing shader code. + /// + /// GPU virtual address of the data + /// Minimum size that the returned span may have + /// Span of the memory location ReadOnlySpan GetCode(ulong address, int minimumSize); + /// + /// Queries the binding number of a constant buffer. + /// + /// Constant buffer index + /// Binding number + int QueryBindingConstantBuffer(int index) + { + return index; + } + + /// + /// Queries the binding number of a storage buffer. + /// + /// Storage buffer index + /// Binding number + int QueryBindingStorageBuffer(int index) + { + return index; + } + + /// + /// Queries the binding number of a texture. + /// + /// Texture index + /// Binding number + int QueryBindingTexture(int index) + { + return index; + } + + /// + /// Queries the binding number of an image. + /// + /// Image index + /// Binding number + int QueryBindingImage(int index) + { + return index; + } + + /// + /// Queries Local Size X for compute shaders. + /// + /// Local Size X int QueryComputeLocalSizeX() { return 1; } + /// + /// Queries Local Size Y for compute shaders. + /// + /// Local Size Y int QueryComputeLocalSizeY() { return 1; } + /// + /// Queries Local Size Z for compute shaders. + /// + /// Local Size Z int QueryComputeLocalSizeZ() { return 1; } + /// + /// Queries Local Memory size in bytes for compute shaders. + /// + /// Local Memory size in bytes int QueryComputeLocalMemorySize() { return 0x1000; } + /// + /// Queries Shared Memory size in bytes for compute shaders. + /// + /// Shared Memory size in bytes int QueryComputeSharedMemorySize() { return 0xc000; } + /// + /// Queries Constant Buffer usage information. + /// + /// A mask where each bit set indicates a bound constant buffer uint QueryConstantBufferUse() { return 0; } + /// + /// Queries host about the presence of the FrontFacing built-in variable bug. + /// + /// True if the bug is present on the host device used, false otherwise bool QueryHostHasFrontFacingBug() { return false; } + /// + /// Queries host about the presence of the vector indexing bug. + /// + /// True if the bug is present on the host device used, false otherwise bool QueryHostHasVectorIndexingBug() { return false; } + /// + /// Queries host storage buffer alignment required. + /// + /// Host storage buffer alignment in bytes int QueryHostStorageBufferOffsetAlignment() { return 16; } + /// + /// Queries host support for texture formats with BGRA component order (such as BGRA8). + /// + /// True if BGRA formats are supported, false otherwise bool QueryHostSupportsBgraFormat() { return true; } + /// + /// Queries host support for fragment shader ordering critical sections on the shader code. + /// + /// True if fragment shader interlock is supported, false otherwise bool QueryHostSupportsFragmentShaderInterlock() { return true; } + /// + /// Queries host support for fragment shader ordering scoped critical sections on the shader code. + /// + /// True if fragment shader ordering is supported, false otherwise bool QueryHostSupportsFragmentShaderOrderingIntel() { return false; } + /// + /// Queries host support for readable images without a explicit format declaration on the shader. + /// + /// True if formatted image load is supported, false otherwise bool QueryHostSupportsImageLoadFormatted() { return true; } + /// + /// Queries host GPU non-constant texture offset support. + /// + /// True if the GPU and driver supports non-constant texture offsets, false otherwise bool QueryHostSupportsNonConstantTextureOffset() { return true; } + /// + /// Queries host GPU shader ballot support. + /// + /// True if the GPU and driver supports shader ballot, false otherwise bool QueryHostSupportsShaderBallot() { return true; } + /// + /// Queries host GPU texture shadow LOD support. + /// + /// True if the GPU and driver supports texture shadow LOD, false otherwise bool QueryHostSupportsTextureShadowLod() { return true; } + /// + /// Queries sampler type information. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// The sampler type value for the given handle SamplerType QuerySamplerType(int handle, int cbufSlot = -1) { return SamplerType.Texture2D; } - bool QueryIsTextureRectangle(int handle, int cbufSlot = -1) + /// + /// Queries texture coordinate normalization information. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// True if the coordinates are normalized, false otherwise + bool QueryTextureCoordNormalized(int handle, int cbufSlot = -1) { return false; } + /// + /// Queries current primitive topology for geometry shaders. + /// + /// Current primitive topology InputTopology QueryPrimitiveTopology() { return InputTopology.Points; } + /// + /// Queries the tessellation evaluation shader primitive winding order. + /// + /// True if the primitive winding order is clockwise, false if counter-clockwise bool QueryTessCw() { return false; } + /// + /// Queries the tessellation evaluation shader abstract patch type. + /// + /// Abstract patch type TessPatchType QueryTessPatchType() { return TessPatchType.Triangles; } + /// + /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. + /// + /// Spacing between tessellated vertices of the patch TessSpacing QueryTessSpacing() { return TessSpacing.EqualSpacing; } + /// + /// Queries texture format information, for shaders using image load or store. + /// + /// + /// This only returns non-compressed color formats. + /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned. + /// + /// Texture handle + /// Constant buffer slot for the texture handle + /// Color format of the non-compressed texture TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) { return TextureFormat.R8G8B8A8Unorm; } + /// + /// Queries transform feedback enable state. + /// + /// True if the shader uses transform feedback, false otherwise bool QueryTransformFeedbackEnabled() { return false; } + /// + /// Queries the varying locations that should be written to the transform feedback buffer. + /// + /// Index of the transform feedback buffer + /// Varying locations for the specified buffer ReadOnlySpan QueryTransformFeedbackVaryingLocations(int bufferIndex) { return ReadOnlySpan.Empty; } + /// + /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. + /// + /// Index of the transform feedback buffer + /// Stride for the specified buffer int QueryTransformFeedbackStride(int bufferIndex) { return 0; } + /// + /// Queries if host state forces early depth testing. + /// + /// True if early depth testing is forced bool QueryEarlyZForce() { return false; } + + /// + /// Registers a texture used by the shader. + /// + /// Texture handle word offset + /// Constant buffer slot where the texture handle is located + void RegisterTexture(int handle, int cbufSlot) + { + // Only useful when recording information for a disk shader cache. + } } } diff --git a/Ryujinx.Graphics.Shader/ShaderProgram.cs b/Ryujinx.Graphics.Shader/ShaderProgram.cs index dd87b67d4..29fff21e6 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgram.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgram.cs @@ -1,25 +1,28 @@ +using Ryujinx.Graphics.Shader.Translation; using System; namespace Ryujinx.Graphics.Shader { public class ShaderProgram { - public ShaderStage Stage { get; } + public ShaderProgramInfo Info { get; } + public TargetLanguage Language { get; } public string Code { get; private set; } public byte[] BinaryCode { get; } - private ShaderProgram(ShaderStage stage) + private ShaderProgram(ShaderProgramInfo info, TargetLanguage language) { - Stage = stage; + Info = info; + Language = language; } - public ShaderProgram(ShaderStage stage, string code) : this(stage) + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, string code) : this(info, language) { Code = code; } - public ShaderProgram(ShaderStage stage, byte[] binaryCode) : this(stage) + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, byte[] binaryCode) : this(info, language) { BinaryCode = binaryCode; } diff --git a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs index d1c1b9457..659f6167e 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -10,6 +10,7 @@ namespace Ryujinx.Graphics.Shader public ReadOnlyCollection Textures { get; } public ReadOnlyCollection Images { get; } + public ShaderStage Stage { get; } public bool UsesInstanceId { get; } public bool UsesRtLayer { get; } public byte ClipDistancesWritten { get; } @@ -20,6 +21,7 @@ namespace Ryujinx.Graphics.Shader BufferDescriptor[] sBuffers, TextureDescriptor[] textures, TextureDescriptor[] images, + ShaderStage stage, bool usesInstanceId, bool usesRtLayer, byte clipDistancesWritten, @@ -30,6 +32,7 @@ namespace Ryujinx.Graphics.Shader Textures = Array.AsReadOnly(textures); Images = Array.AsReadOnly(images); + Stage = stage; UsesInstanceId = usesInstanceId; UsesRtLayer = usesRtLayer; ClipDistancesWritten = clipDistancesWritten; diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 31c71f20f..ce79f3b8e 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -74,7 +74,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr for (int j = 0; j < locations.Length; j++) { byte location = locations[j]; - if (location < 0x80) + if (location < 0xc0) { context.Info.TransformFeedbackOutputs[location] = new TransformFeedbackOutput(tfbIndex, j * 4, stride); } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index 933f265f9..2dc239643 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -30,7 +30,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { Functions = new List(); - TransformFeedbackOutputs = new TransformFeedbackOutput[0x80]; + TransformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/TextureDescriptor.cs b/Ryujinx.Graphics.Shader/TextureDescriptor.cs index b7b0ae12c..85ea9adbe 100644 --- a/Ryujinx.Graphics.Shader/TextureDescriptor.cs +++ b/Ryujinx.Graphics.Shader/TextureDescriptor.cs @@ -2,6 +2,8 @@ namespace Ryujinx.Graphics.Shader { public struct TextureDescriptor { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + public readonly int Binding; public readonly SamplerType Type; diff --git a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index 910faf1ca..e9b073ab2 100644 --- a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -164,9 +164,9 @@ namespace Ryujinx.Graphics.Shader.Translation bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; - bool isRect = !isBindless && config.GpuAccessor.QueryIsTextureRectangle(texOp.Handle, texOp.CbufSlot); + bool isCoordNormalized = !isBindless && config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot); - if (!(hasInvalidOffset || isRect)) + if (!hasInvalidOffset && isCoordNormalized) { return node; } @@ -263,7 +263,7 @@ namespace Ryujinx.Graphics.Shader.Translation hasInvalidOffset &= !areAllOffsetsConstant; - if (!(hasInvalidOffset || isRect)) + if (!hasInvalidOffset && isCoordNormalized) { return node; } @@ -300,15 +300,17 @@ namespace Ryujinx.Graphics.Shader.Translation return res; } - // Emulate texture rectangle by normalizing the coordinates on the shader. - // When sampler*Rect is used, the coords are expected to the in the [0, W or H] range, + // Emulate non-normalized coordinates by normalizing the coordinates on the shader. + // Without normalization, the coordinates are expected to the in the [0, W or H] range, // and otherwise, it is expected to be in the [0, 1] range. // We normalize by dividing the coords by the texture size. - if (isRect && !intCoords) + if (!isCoordNormalized && !intCoords) { config.SetUsedFeature(FeatureFlags.IntegerSampling); - for (int index = 0; index < coordsCount; index++) + int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; + + for (int index = 0; index < normCoordsCount; index++) { Operand coordSize = Local(); diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 6bb045ec1..23b8b9510 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -41,9 +41,7 @@ namespace Ryujinx.Graphics.Shader.Translation public FeatureFlags UsedFeatures { get; private set; } - public HashSet TextureHandlesForCache { get; } - - private readonly TranslationCounts _counts; + public int Cb1DataSize { get; private set; } public bool NextUsesFixedFuncAttributes { get; private set; } public int UsedInputAttributes { get; private set; } @@ -109,21 +107,22 @@ namespace Ryujinx.Graphics.Shader.Translation private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - public int FirstConstantBufferBinding { get; private set; } - public int FirstStorageBufferBinding { get; private set; } + private int _firstConstantBufferBinding; + private int _firstStorageBufferBinding; - public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) + public int FirstConstantBufferBinding => _firstConstantBufferBinding; + public int FirstStorageBufferBinding => _firstStorageBufferBinding; + + public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options) { - Stage = ShaderStage.Compute; - GpuAccessor = gpuAccessor; - Options = options; - _counts = counts; - TextureHandlesForCache = new HashSet(); - _usedTextures = new Dictionary(); - _usedImages = new Dictionary(); + Stage = ShaderStage.Compute; + GpuAccessor = gpuAccessor; + Options = options; + _usedTextures = new Dictionary(); + _usedImages = new Dictionary(); } - public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) : this(gpuAccessor, options, counts) + public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) { Stage = header.Stage; GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; @@ -144,6 +143,16 @@ namespace Ryujinx.Graphics.Shader.Translation return BitOperations.PopCount((uint)OmapTargets) + 1; } + public uint ConstantBuffer1Read(int offset) + { + if (Cb1DataSize < offset + 4) + { + Cb1DataSize = offset + 4; + } + + return GpuAccessor.ConstantBuffer1Read(offset); + } + public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1) { // When the formatted load extension is supported, we don't need to @@ -197,8 +206,6 @@ namespace Ryujinx.Graphics.Shader.Translation ClipDistancesWritten |= other.ClipDistancesWritten; UsedFeatures |= other.UsedFeatures; - TextureHandlesForCache.UnionWith(other.TextureHandlesForCache); - UsedInputAttributes |= other.UsedInputAttributes; UsedOutputAttributes |= other.UsedOutputAttributes; _usedConstantBuffers |= other._usedConstantBuffers; @@ -391,6 +398,8 @@ namespace Ryujinx.Graphics.Shader.Translation bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize; SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent); } + + GpuAccessor.RegisterTexture(handle, cbufSlot); } private void SetUsedTextureOrImage( @@ -485,13 +494,12 @@ namespace Ryujinx.Graphics.Shader.Translation usedMask |= (int)GpuAccessor.QueryConstantBufferUse(); } - FirstConstantBufferBinding = _counts.UniformBuffersCount; - return _cachedConstantBufferDescriptors = GetBufferDescriptors( usedMask, 0, UsedFeatures.HasFlag(FeatureFlags.CbIndexing), - _counts.IncrementUniformBuffersCount); + out _firstConstantBufferBinding, + GpuAccessor.QueryBindingConstantBuffer); } public BufferDescriptor[] GetStorageBufferDescriptors() @@ -501,21 +509,23 @@ namespace Ryujinx.Graphics.Shader.Translation return _cachedStorageBufferDescriptors; } - FirstStorageBufferBinding = _counts.StorageBuffersCount; - return _cachedStorageBufferDescriptors = GetBufferDescriptors( _usedStorageBuffers, _usedStorageBuffersWrite, true, - _counts.IncrementStorageBuffersCount); + out _firstStorageBufferBinding, + GpuAccessor.QueryBindingStorageBuffer); } private static BufferDescriptor[] GetBufferDescriptors( int usedMask, int writtenMask, bool isArray, - Func getBindingCallback) + out int firstBinding, + Func getBindingCallback) { + firstBinding = 0; + bool hasFirstBinding = false; var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; int lastSlot = -1; @@ -529,13 +539,25 @@ namespace Ryujinx.Graphics.Shader.Translation // The next array entries also consumes bindings, even if they are unused. for (int j = lastSlot + 1; j < slot; j++) { - getBindingCallback(); + int binding = getBindingCallback(j); + + if (!hasFirstBinding) + { + firstBinding = binding; + hasFirstBinding = true; + } } } lastSlot = slot; - descriptors[i] = new BufferDescriptor(getBindingCallback(), slot); + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + + if (!hasFirstBinding) + { + firstBinding = descriptors[i].Binding; + hasFirstBinding = true; + } if ((writtenMask & (1 << slot)) != 0) { @@ -550,15 +572,15 @@ namespace Ryujinx.Graphics.Shader.Translation public TextureDescriptor[] GetTextureDescriptors() { - return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, _counts.IncrementTexturesCount); + return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); } public TextureDescriptor[] GetImageDescriptors() { - return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, _counts.IncrementImagesCount); + return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage); } - private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) + private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) { var descriptors = new TextureDescriptor[dict.Count]; @@ -568,7 +590,7 @@ namespace Ryujinx.Graphics.Shader.Translation var info = kv.Key; var meta = kv.Value; - int binding = getBindingCallback(); + int binding = getBindingCallback(i); descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle); descriptors[i].SetFlag(meta.UsageFlags); diff --git a/Ryujinx.Graphics.Shader/Translation/TranslationCounts.cs b/Ryujinx.Graphics.Shader/Translation/TranslationCounts.cs deleted file mode 100644 index 6751d7ea6..000000000 --- a/Ryujinx.Graphics.Shader/Translation/TranslationCounts.cs +++ /dev/null @@ -1,36 +0,0 @@ -namespace Ryujinx.Graphics.Shader.Translation -{ - public class TranslationCounts - { - public int UniformBuffersCount { get; private set; } - public int StorageBuffersCount { get; private set; } - public int TexturesCount { get; private set; } - public int ImagesCount { get; private set; } - - public TranslationCounts() - { - // The first binding is reserved for the support buffer. - UniformBuffersCount = 1; - } - - internal int IncrementUniformBuffersCount() - { - return UniformBuffersCount++; - } - - internal int IncrementStorageBuffersCount() - { - return StorageBuffersCount++; - } - - internal int IncrementTexturesCount() - { - return TexturesCount++; - } - - internal int IncrementImagesCount() - { - return ImagesCount++; - } - } -} diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index 5119dfb64..e1614e660 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -25,18 +25,12 @@ namespace Ryujinx.Graphics.Shader.Translation } } - public static TranslatorContext CreateContext( - ulong address, - IGpuAccessor gpuAccessor, - TranslationOptions options, - TranslationCounts counts = null) + public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) { - counts ??= new TranslationCounts(); - - return DecodeShader(address, gpuAccessor, options, counts); + return DecodeShader(address, gpuAccessor, options); } - internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config, out ShaderProgramInfo shaderProgramInfo) + internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config) { var cfgs = new ControlFlowGraph[functions.Length]; var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; @@ -87,31 +81,25 @@ namespace Ryujinx.Graphics.Shader.Translation StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); - ShaderProgram program; - - switch (config.Options.TargetLanguage) - { - case TargetLanguage.Glsl: - program = new ShaderProgram(config.Stage, GlslGenerator.Generate(sInfo, config)); - break; - default: - throw new NotImplementedException(config.Options.TargetLanguage.ToString()); - } - - shaderProgramInfo = new ShaderProgramInfo( + ShaderProgramInfo info = new ShaderProgramInfo( config.GetConstantBufferDescriptors(), config.GetStorageBufferDescriptors(), config.GetTextureDescriptors(), config.GetImageDescriptors(), + config.Stage, config.UsedFeatures.HasFlag(FeatureFlags.InstanceId), config.UsedFeatures.HasFlag(FeatureFlags.RtLayer), config.ClipDistancesWritten, config.OmapTargets); - return program; + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; } - private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) + private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) { ShaderConfig config; DecodedProgram program; @@ -119,13 +107,13 @@ namespace Ryujinx.Graphics.Shader.Translation if ((options.Flags & TranslationFlags.Compute) != 0) { - config = new ShaderConfig(gpuAccessor, options, counts); + config = new ShaderConfig(gpuAccessor, options); program = Decoder.Decode(config, address); } else { - config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts); + config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options); program = Decoder.Decode(config, address + HeaderSize); } @@ -138,20 +126,6 @@ namespace Ryujinx.Graphics.Shader.Translation { maxEndAddress = block.EndAddress; } - - if (!config.UsedFeatures.HasFlag(FeatureFlags.Bindless)) - { - for (int index = 0; index < block.OpCodes.Count; index++) - { - InstOp op = block.OpCodes[index]; - - if (op.Props.HasFlag(InstProps.Tex)) - { - int tidB = (int)((op.RawOpCode >> 36) & 0x1fff); - config.TextureHandlesForCache.Add(tidB); - } - } - } } } diff --git a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index b4e61cb63..8900f9fe6 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -16,10 +16,7 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderStage Stage => _config.Stage; public int Size => _config.Size; - - public FeatureFlags UsedFeatures => _config.UsedFeatures; - - public HashSet TextureHandlesForCache => _config.TextureHandlesForCache; + public int Cb1DataSize => _config.Cb1DataSize; public IGpuAccessor GpuAccessor => _config.GpuAccessor; @@ -129,16 +126,13 @@ namespace Ryujinx.Graphics.Shader.Translation return output; } - public ShaderProgram Translate( - out ShaderProgramInfo shaderProgramInfo, - TranslatorContext nextStage = null, - TranslatorContext other = null) + public void SetNextStage(TranslatorContext nextStage) { - if (nextStage != null) - { - _config.MergeFromtNextStage(nextStage._config); - } + _config.MergeFromtNextStage(nextStage._config); + } + public ShaderProgram Translate(TranslatorContext other = null) + { FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); if (other != null) @@ -152,7 +146,7 @@ namespace Ryujinx.Graphics.Shader.Translation _config.InheritFrom(other._config); } - return Translator.Translate(code, _config, out shaderProgramInfo); + return Translator.Translate(code, _config); } } } diff --git a/Ryujinx.Headless.SDL2/WindowBase.cs b/Ryujinx.Headless.SDL2/WindowBase.cs index 3fbd9bc3d..74eb0d31a 100644 --- a/Ryujinx.Headless.SDL2/WindowBase.cs +++ b/Ryujinx.Headless.SDL2/WindowBase.cs @@ -43,6 +43,7 @@ namespace Ryujinx.Headless.SDL2 private GraphicsDebugLevel _glLogLevel; private readonly Stopwatch _chrono; private readonly long _ticksPerFrame; + private readonly CancellationTokenSource _gpuCancellationTokenSource; private readonly ManualResetEvent _exitEvent; private long _ticks; @@ -66,6 +67,7 @@ namespace Ryujinx.Headless.SDL2 _glLogLevel = glLogLevel; _chrono = new Stopwatch(); _ticksPerFrame = Stopwatch.Frequency / TargetFps; + _gpuCancellationTokenSource = new CancellationTokenSource(); _exitEvent = new ManualResetEvent(false); _aspectRatio = aspectRatio; _enableMouse = enableMouse; @@ -162,7 +164,7 @@ namespace Ryujinx.Headless.SDL2 Device.Gpu.Renderer.RunLoop(() => { - Device.Gpu.InitializeShaderCache(); + Device.Gpu.InitializeShaderCache(_gpuCancellationTokenSource.Token); Translator.IsReadyForTranslation.Set(); while (_isActive) @@ -223,6 +225,8 @@ namespace Ryujinx.Headless.SDL2 return; } + _gpuCancellationTokenSource.Cancel(); + _isStopped = true; _isActive = false; diff --git a/Ryujinx.ShaderTools/Program.cs b/Ryujinx.ShaderTools/Program.cs index 43b9494e7..746b780c0 100644 --- a/Ryujinx.ShaderTools/Program.cs +++ b/Ryujinx.ShaderTools/Program.cs @@ -55,7 +55,7 @@ namespace Ryujinx.ShaderTools TranslationOptions translationOptions = new TranslationOptions(options.TargetLanguage, options.TargetApi, flags); - ShaderProgram program = Translator.CreateContext(0, new GpuAccessor(data), translationOptions).Translate(out _); + ShaderProgram program = Translator.CreateContext(0, new GpuAccessor(data), translationOptions).Translate(); if (options.OutputPath == null) { diff --git a/Ryujinx/Ui/RendererWidgetBase.cs b/Ryujinx/Ui/RendererWidgetBase.cs index cdbf5d6c5..123403088 100644 --- a/Ryujinx/Ui/RendererWidgetBase.cs +++ b/Ryujinx/Ui/RendererWidgetBase.cs @@ -60,6 +60,8 @@ namespace Ryujinx.Ui private readonly ManualResetEvent _exitEvent; + private readonly CancellationTokenSource _gpuCancellationTokenSource; + // Hide Cursor const int CursorHideIdleTime = 8; // seconds private static readonly Cursor _invisibleCursor = new Cursor(Display.Default, CursorType.BlankCursor); @@ -105,6 +107,8 @@ namespace Ryujinx.Ui _exitEvent = new ManualResetEvent(false); + _gpuCancellationTokenSource = new CancellationTokenSource(); + _hideCursorOnIdle = ConfigurationState.Instance.HideCursorOnIdle; _lastCursorMoveTime = Stopwatch.GetTimestamp(); @@ -387,7 +391,7 @@ namespace Ryujinx.Ui Device.Gpu.Renderer.RunLoop(() => { Device.Gpu.SetGpuThread(); - Device.Gpu.InitializeShaderCache(); + Device.Gpu.InitializeShaderCache(_gpuCancellationTokenSource.Token); Translator.IsReadyForTranslation.Set(); (Toplevel as MainWindow)?.ActivatePauseMenu(); @@ -499,6 +503,8 @@ namespace Ryujinx.Ui return; } + _gpuCancellationTokenSource.Cancel(); + _isStopped = true; _isActive = false; @@ -603,7 +609,7 @@ namespace Ryujinx.Ui if (currentHotkeyState.HasFlag(KeyboardHotkeyState.ToggleMute) && !_prevHotkeyState.HasFlag(KeyboardHotkeyState.ToggleMute)) { - if (Device.IsAudioMuted()) + if (Device.IsAudioMuted()) { Device.SetVolume(ConfigurationState.Instance.System.AudioVolume); } diff --git a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs index 0e3b4892f..8bf8af36c 100644 --- a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs +++ b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs @@ -172,7 +172,7 @@ namespace Ryujinx.Ui.Widgets ResponseType response = (ResponseType)fileChooser.Run(); string destination = fileChooser.Filename; - + fileChooser.Dispose(); if (response == ResponseType.Accept) @@ -490,7 +490,7 @@ namespace Ryujinx.Ui.Widgets private void OpenPtcDir_Clicked(object sender, EventArgs args) { string ptcDir = System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "cpu"); - + string mainPath = System.IO.Path.Combine(ptcDir, "0"); string backupPath = System.IO.Path.Combine(ptcDir, "1"); @@ -515,7 +515,7 @@ namespace Ryujinx.Ui.Widgets OpenHelper.OpenFolder(shaderCacheDir); } - + private void PurgePtcCache_Clicked(object sender, EventArgs args) { DirectoryInfo mainDir = new DirectoryInfo(System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "cpu", "0")); @@ -526,7 +526,7 @@ namespace Ryujinx.Ui.Widgets List cacheFiles = new List(); if (mainDir.Exists) - { + { cacheFiles.AddRange(mainDir.EnumerateFiles("*.cache")); } @@ -539,9 +539,9 @@ namespace Ryujinx.Ui.Widgets { foreach (FileInfo file in cacheFiles) { - try - { - file.Delete(); + try + { + file.Delete(); } catch(Exception e) { @@ -557,18 +557,21 @@ namespace Ryujinx.Ui.Widgets { DirectoryInfo shaderCacheDir = new DirectoryInfo(System.IO.Path.Combine(AppDataManager.GamesDirPath, _titleIdText, "cache", "shader")); - MessageDialog warningDialog = GtkDialog.CreateConfirmationDialog("Warning", $"You are about to delete the shader cache for :\n\n{_titleName}\n\nAre you sure you want to proceed?"); + using MessageDialog warningDialog = GtkDialog.CreateConfirmationDialog("Warning", $"You are about to delete the shader cache for :\n\n{_titleName}\n\nAre you sure you want to proceed?"); - List cacheDirectory = new List(); + List oldCacheDirectories = new List(); + List newCacheFiles = new List(); if (shaderCacheDir.Exists) { - cacheDirectory.AddRange(shaderCacheDir.EnumerateDirectories("*")); + oldCacheDirectories.AddRange(shaderCacheDir.EnumerateDirectories("*")); + newCacheFiles.AddRange(shaderCacheDir.GetFiles("*.toc")); + newCacheFiles.AddRange(shaderCacheDir.GetFiles("*.data")); } - if (cacheDirectory.Count > 0 && warningDialog.Run() == (int)ResponseType.Yes) + if ((oldCacheDirectories.Count > 0 || newCacheFiles.Count > 0) && warningDialog.Run() == (int)ResponseType.Yes) { - foreach (DirectoryInfo directory in cacheDirectory) + foreach (DirectoryInfo directory in oldCacheDirectories) { try { @@ -579,9 +582,19 @@ namespace Ryujinx.Ui.Widgets GtkDialog.CreateErrorDialog($"Error purging shader cache at {directory.Name}: {e}"); } } - } - warningDialog.Dispose(); + foreach (FileInfo file in newCacheFiles) + { + try + { + file.Delete(); + } + catch (Exception e) + { + GtkDialog.CreateErrorDialog($"Error purging shader cache at {file.Name}: {e}"); + } + } + } } } }