From 3763e2cc10b0bef5a3d27e31ff0ba8180bca74c2 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 23 Jun 2024 18:59:45 +0100 Subject: [PATCH] WIP sparse stuff --- src/ARMeilleure/Common/AddressTable.cs | 205 +++++++++++++++--- .../Instructions/InstEmitFlowHelper.cs | 13 +- src/ARMeilleure/Translation/PTC/Ptc.cs | 7 +- src/ARMeilleure/Translation/Translator.cs | 8 +- .../Arm32/Target/Arm64/InstEmitFlow.cs | 9 - .../Arm64/Target/Arm64/InstEmitSystem.cs | 6 - src/Ryujinx.Cpu/LightningJit/Translator.cs | 8 +- src/Ryujinx.Memory/SparseMemoryBlock.cs | 120 ++++++++++ 8 files changed, 303 insertions(+), 73 deletions(-) create mode 100644 src/Ryujinx.Memory/SparseMemoryBlock.cs diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs index 5b6d48bbc..ebe5dfb01 100644 --- a/src/ARMeilleure/Common/AddressTable.cs +++ b/src/ARMeilleure/Common/AddressTable.cs @@ -1,7 +1,10 @@ using ARMeilleure.Diagnostics; +using Ryujinx.Memory; using System; using System.Collections.Generic; +using System.Linq; using System.Runtime.InteropServices; +using System.Threading; namespace ARMeilleure.Common { @@ -11,6 +14,12 @@ namespace ARMeilleure.Common /// Type of the value public unsafe class AddressTable : IDisposable where TEntry : unmanaged { + /// + /// If true, the sparse 2-level table should be used to improve performance. + /// If false, the platform doesn't properly support it, or will be negatively impacted. + /// + public static bool UseSparseTable => true; + /// /// Represents a level in an . /// @@ -53,12 +62,33 @@ namespace ARMeilleure.Common } } + private readonly struct AddressTablePage + { + public readonly bool IsSparse; + public readonly IntPtr Address; + + public AddressTablePage(bool isSparse, IntPtr address) + { + IsSparse = isSparse; + Address = address; + } + } + private bool _disposed; private TEntry** _table; - private readonly List _pages; - private readonly TEntry* _fallbackTable; + private readonly List _pages; private TEntry _fill; + private readonly bool _sparse; + private readonly MemoryBlock _sparseFill; + private readonly SparseMemoryBlock _fillBottomLevel; + private readonly TEntry* _fillBottomLevelPtr; + + private readonly List _sparseReserved; + private readonly ulong _sparseBlockSize; + private readonly ReaderWriterLockSlim _sparseLock; + private ulong _sparseReservedOffset; + /// /// Gets the bits used by the of the instance. /// @@ -80,8 +110,7 @@ namespace ARMeilleure.Common } set { - *_fallbackTable = value; - _fill = value; + UpdateFill(value); } } @@ -102,26 +131,15 @@ namespace ARMeilleure.Common } } - /// - /// Gets a pointer to a single entry table containing only the leaf fill value. - /// - public IntPtr Fallback - { - get - { - ObjectDisposedException.ThrowIf(_disposed, this); - - return (IntPtr)_fallbackTable; - } - } - /// /// Constructs a new instance of the class with the specified list of /// . /// + /// Levels for the address table + /// True if the bottom page should be sparsely mapped /// is null /// Length of is less than 2 - public AddressTable(Level[] levels) + public AddressTable(Level[] levels, bool sparse) { ArgumentNullException.ThrowIfNull(levels); @@ -130,7 +148,7 @@ namespace ARMeilleure.Common throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); } - _pages = new List(capacity: 16); + _pages = new List(capacity: 16); Levels = levels; Mask = 0; @@ -140,7 +158,35 @@ namespace ARMeilleure.Common Mask |= level.Mask; } - _fallbackTable = (TEntry*)NativeAllocator.Instance.Allocate((ulong)sizeof(TEntry)); + _sparse = sparse; + + if (sparse) + { + // If the address table is sparse, allocate a fill block + + _sparseFill = new MemoryBlock(65536, MemoryAllocationFlags.Mirrorable); + + ulong bottomLevelSize = (1ul << levels.Last().Length) * (ulong)sizeof(TEntry); + + _fillBottomLevel = new SparseMemoryBlock(bottomLevelSize, null, _sparseFill); + _fillBottomLevelPtr = (TEntry*)_fillBottomLevel.Block.Pointer; + + _sparseReserved = new List(); + _sparseLock = new ReaderWriterLockSlim(); + + _sparseBlockSize = bottomLevelSize << 3; + } + } + + private void UpdateFill(TEntry fillValue) + { + if (_sparseFill != null) + { + Span span = _sparseFill.GetSpan(0, (int)_sparseFill.Size); + MemoryMarshal.Cast(span).Fill(fillValue); + } + + _fill = fillValue; } /// @@ -172,7 +218,13 @@ namespace ARMeilleure.Common lock (_pages) { - return ref GetPage(address)[Levels[^1].GetValue(address)]; + TEntry* page = GetPage(address); + + int index = Levels[^1].GetValue(address); + + EnsureMapped((IntPtr)(page + index)); + + return ref page[index]; } } @@ -190,13 +242,18 @@ namespace ARMeilleure.Common ref Level level = ref Levels[i]; ref TEntry* nextPage = ref page[level.GetValue(address)]; - if (nextPage == null) + if (nextPage == null || nextPage == _fillBottomLevelPtr) { ref Level nextLevel = ref Levels[i + 1]; - nextPage = i == Levels.Length - 2 ? - (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) : - (TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false); + if (i == Levels.Length - 2) + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true); + } + else + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, GetFillValue(i), leaf: false); + } } page = (TEntry**)nextPage; @@ -205,6 +262,46 @@ namespace ARMeilleure.Common return (TEntry*)page; } + private void EnsureMapped(IntPtr ptr) + { + if (_sparse) + { + // Check sparse allocations to see if the pointer is in any of them. + // Ensure the page is committed if there's a match. + + _sparseLock.EnterReadLock(); + + try + { + foreach (SparseMemoryBlock sparse in _sparseReserved) + { + if (ptr >= sparse.Block.Pointer && ptr < sparse.Block.Pointer + (IntPtr)sparse.Block.Size) + { + sparse.EnsureMapped((ulong)(ptr - sparse.Block.Pointer)); + + break; + } + } + } + finally + { + _sparseLock.ExitReadLock(); + } + } + } + + private IntPtr GetFillValue(int level) + { + if (_fillBottomLevel != null && level == Levels.Length - 2) + { + return (IntPtr)_fillBottomLevelPtr; + } + else + { + return IntPtr.Zero; + } + } + /// /// Lazily initialize and get the root page of the . /// @@ -213,12 +310,17 @@ namespace ARMeilleure.Common { if (_table == null) { - _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false); + _table = (TEntry**)Allocate(1 << Levels[0].Length, GetFillValue(0), leaf: false); } return _table; } + private void InitLeafPage(Span page) + { + MemoryMarshal.Cast(page).Fill(_fill); + } + /// /// Allocates a block of memory of the specified type and length. /// @@ -230,16 +332,42 @@ namespace ARMeilleure.Common private IntPtr Allocate(int length, T fill, bool leaf) where T : unmanaged { var size = sizeof(T) * length; - var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); - var span = new Span((void*)page, length); - span.Fill(fill); + AddressTablePage page; + + if (_sparse && leaf) + { + _sparseLock.EnterWriteLock(); + + if (_sparseReserved.Count == 0 || _sparseReservedOffset == _sparseBlockSize) + { + _sparseReserved.Add(new SparseMemoryBlock(_sparseBlockSize, InitLeafPage, _sparseFill)); + + _sparseReservedOffset = 0; + } + + SparseMemoryBlock block = _sparseReserved.Last(); + + page = new AddressTablePage(true, block.Block.Pointer + (IntPtr)_sparseReservedOffset); + + _sparseReservedOffset += (ulong)size; + + _sparseLock.ExitWriteLock(); + } + else + { + var address = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); + page = new AddressTablePage(false, address); + + var span = new Span((void*)page.Address, length); + span.Fill(fill); + } _pages.Add(page); TranslatorEventSource.Log.AddressTableAllocated(size, leaf); - return page; + return page.Address; } /// @@ -262,10 +390,23 @@ namespace ARMeilleure.Common { foreach (var page in _pages) { - Marshal.FreeHGlobal(page); + if (!page.IsSparse) + { + Marshal.FreeHGlobal(page.Address); + } } - Marshal.FreeHGlobal((IntPtr)_fallbackTable); + if (_sparse) + { + foreach (SparseMemoryBlock block in _sparseReserved) + { + block.Dispose(); + } + + _fillBottomLevel.Dispose(); + _sparseFill.Dispose(); + _sparseLock.Dispose(); + } _disposed = true; } diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs index fbfdcefce..2a0355cc3 100644 --- a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -231,18 +231,7 @@ namespace ARMeilleure.Instructions Const(3) ); - // TODO: could possibly make a fallback page that level 1 is filled with that contains dispatch stub on all pages - // Would save this load and the comparisons - // 16MB of the same value is a bit wasteful so it could replicate with remapping. - - Operand fallback = !context.HasPtc ? - Const((long)context.FunctionTable.Fallback) : - Const((long)context.FunctionTable.Fallback, Ptc.DispatchFallbackSymbol); - - Operand pageIsZero = context.ICompareEqual(page, Const(0L)); - - // Small trick to keep this branchless - if the page is zero, load a fallback table entry that always contains the dispatch stub. - hostAddress = context.Load(OperandType.I64, context.ConditionalSelect(pageIsZero, fallback, context.Add(page, index2))); + hostAddress = context.Load(OperandType.I64, context.Add(page, index2)); } else { diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index 58ff9b145..59ced5806 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 26950; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 26957; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -41,7 +41,6 @@ namespace ARMeilleure.Translation.PTC public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4); - public static readonly Symbol DispatchFallbackSymbol = new(SymbolType.Special, 5); private const byte FillingByte = 0x00; private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; @@ -711,10 +710,6 @@ namespace ARMeilleure.Translation.PTC { imm = translator.FunctionTable.Base; } - else if (symbol == DispatchFallbackSymbol) - { - imm = translator.FunctionTable.Fallback; - } if (imm == null) { diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs index c3796cb99..9a3d7cec5 100644 --- a/src/ARMeilleure/Translation/Translator.cs +++ b/src/ARMeilleure/Translation/Translator.cs @@ -22,8 +22,6 @@ namespace ARMeilleure.Translation { public class Translator { - private const bool UseSparseTable = true; - private static readonly AddressTable.Level[] _levels64Bit = new AddressTable.Level[] { @@ -88,7 +86,9 @@ namespace ARMeilleure.Translation AddressTable.Level[] levels; - if (UseSparseTable) + bool useSparseTable = AddressTable.UseSparseTable; + + if (useSparseTable) { levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; } @@ -99,7 +99,7 @@ namespace ARMeilleure.Translation CountTable = new EntryTable(); Functions = new TranslatorCache(); - FunctionTable = new AddressTable(levels); + FunctionTable = new AddressTable(levels, useSparseTable); Stubs = new TranslatorStubs(FunctionTable); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs index d72e039c8..f0b18fcbf 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs @@ -214,18 +214,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); asm.Lsl(indexReg, indexReg, Const(3)); - // Is the page address zero? Make sure to use the fallback if it is. - asm.Tst(rn, rn); - // Index into the page. asm.Add(rn, rn, indexReg); - // Reuse the index register for the fallback - ulong fallback = (ulong)funcTable.Fallback; - asm.Mov(indexReg, fallback); - - asm.Csel(rn, indexReg, rn, ArmCondition.Eq); - // Load the final branch address asm.LdrRiUn(rn, rn, 0); diff --git a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs index c157d752e..dc8fc2c14 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs @@ -385,12 +385,6 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 // Index into the page. asm.Add(rn, rn, indexReg); - // Reuse the index register for the fallback - ulong fallback = (ulong)funcTable.Fallback; - asm.Mov(indexReg, fallback); - - asm.Csel(rn, indexReg, rn, ArmCondition.Eq); - // Load the final branch address asm.LdrRiUn(rn, rn, 0); diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index 8b1b875f4..19f883efa 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -16,8 +16,6 @@ namespace Ryujinx.Cpu.LightningJit { class Translator : IDisposable { - private const bool UseSparseTable = true; - // Should be enabled on platforms that enforce W^X. private static bool IsNoWxPlatform => false; @@ -78,9 +76,11 @@ namespace Ryujinx.Cpu.LightningJit JitCache.Initialize(new JitMemoryAllocator(forJit: true)); } + bool useSparseTable = AddressTable.UseSparseTable; + AddressTable.Level[] levels; - if (UseSparseTable) + if (useSparseTable) { levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; } @@ -90,7 +90,7 @@ namespace Ryujinx.Cpu.LightningJit } Functions = new TranslatorCache(); - FunctionTable = new AddressTable(levels); + FunctionTable = new AddressTable(levels, useSparseTable); Stubs = new TranslatorStubs(FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; diff --git a/src/Ryujinx.Memory/SparseMemoryBlock.cs b/src/Ryujinx.Memory/SparseMemoryBlock.cs new file mode 100644 index 000000000..8c6dbea86 --- /dev/null +++ b/src/Ryujinx.Memory/SparseMemoryBlock.cs @@ -0,0 +1,120 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Memory +{ + public delegate void PageInitDelegate(Span page); + + public class SparseMemoryBlock : IDisposable + { + private const ulong MapGranularity = 1UL << 17; + + private readonly PageInitDelegate _pageInit; + + private readonly object _lock = new object(); + private readonly ulong _pageSize; + private readonly MemoryBlock _reservedBlock; + private readonly List _mappedBlocks; + private ulong _mappedBlockUsage; + + private readonly ulong[] _mappedPageBitmap; + + public MemoryBlock Block => _reservedBlock; + + public SparseMemoryBlock(ulong size, PageInitDelegate pageInit, MemoryBlock fill) + { + _pageSize = MemoryBlock.GetPageSize(); + _reservedBlock = new MemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.ViewCompatible); + _mappedBlocks = new List(); + _pageInit = pageInit; + + int pages = (int)BitUtils.DivRoundUp(size, _pageSize); + int bitmapEntries = BitUtils.DivRoundUp(pages, 64); + _mappedPageBitmap = new ulong[bitmapEntries]; + + if (fill != null) + { + // Fill the block with mappings from the fill block. + + if (fill.Size % _pageSize != 0) + { + throw new ArgumentException("Fill memory block should be page aligned.", nameof(fill)); + } + + int repeats = (int)BitUtils.DivRoundUp(size, fill.Size); + + ulong offset = 0; + for (int i = 0; i < repeats; i++) + { + _reservedBlock.MapView(fill, 0, offset, Math.Min(fill.Size, size - offset)); + offset += fill.Size; + } + } + + // If a fill block isn't provided, the pages that aren't EnsureMapped are unmapped. + // The caller can rely on signal handler to fill empty pages instead. + } + + private void MapPage(ulong pageOffset) + { + // Take a page from the latest mapped block. + MemoryBlock block = _mappedBlocks.LastOrDefault(); + + if (block == null || _mappedBlockUsage == MapGranularity) + { + // Need to map some more memory. + + block = new MemoryBlock(MapGranularity, MemoryAllocationFlags.Mirrorable | MemoryAllocationFlags.NoMap); + + _mappedBlocks.Add(block); + + _mappedBlockUsage = 0; + } + + _reservedBlock.MapView(block, _mappedBlockUsage, pageOffset, _pageSize); + _pageInit(_reservedBlock.GetSpan(pageOffset, (int)_pageSize)); + + _mappedBlockUsage += _pageSize; + } + + public void EnsureMapped(ulong offset) + { + int pageIndex = (int)(offset / _pageSize); + int bitmapIndex = pageIndex >> 6; + + ref ulong entry = ref _mappedPageBitmap[bitmapIndex]; + ulong bit = 1UL << (pageIndex & 63); + + if ((entry & bit) == 0) + { + // Not mapped. + + lock (_lock) + { + // Check the bit while locked to make sure that this only happens once. + + if ((entry & bit) == 0) + { + MapPage(offset & ~(_pageSize - 1)); + + entry |= bit; + } + } + } + } + + public void Dispose() + { + _reservedBlock.Dispose(); + + foreach (MemoryBlock block in _mappedBlocks) + { + block.Dispose(); + } + + GC.SuppressFinalize(this); + } + } +}