diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs deleted file mode 100644 index fcab3a202..000000000 --- a/src/ARMeilleure/Common/AddressTable.cs +++ /dev/null @@ -1,252 +0,0 @@ -using ARMeilleure.Diagnostics; -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; - -namespace ARMeilleure.Common -{ - /// - /// Represents a table of guest address to a value. - /// - /// Type of the value - public unsafe class AddressTable : IDisposable where TEntry : unmanaged - { - /// - /// Represents a level in an . - /// - public readonly struct Level - { - /// - /// Gets the index of the in the guest address. - /// - public int Index { get; } - - /// - /// Gets the length of the in the guest address. - /// - public int Length { get; } - - /// - /// Gets the mask which masks the bits used by the . - /// - public ulong Mask => ((1ul << Length) - 1) << Index; - - /// - /// Initializes a new instance of the structure with the specified - /// and . - /// - /// Index of the - /// Length of the - public Level(int index, int length) - { - (Index, Length) = (index, length); - } - - /// - /// Gets the value of the from the specified guest . - /// - /// Guest address - /// Value of the from the specified guest - public int GetValue(ulong address) - { - return (int)((address & Mask) >> Index); - } - } - - private bool _disposed; - private TEntry** _table; - private readonly List _pages; - - /// - /// Gets the bits used by the of the instance. - /// - public ulong Mask { get; } - - /// - /// Gets the s used by the instance. - /// - public Level[] Levels { get; } - - /// - /// Gets or sets the default fill value of newly created leaf pages. - /// - public TEntry Fill { get; set; } - - /// - /// Gets the base address of the . - /// - /// instance was disposed - public IntPtr Base - { - get - { - ObjectDisposedException.ThrowIf(_disposed, this); - - lock (_pages) - { - return (IntPtr)GetRootPage(); - } - } - } - - /// - /// Constructs a new instance of the class with the specified list of - /// . - /// - /// is null - /// Length of is less than 2 - public AddressTable(Level[] levels) - { - ArgumentNullException.ThrowIfNull(levels); - - if (levels.Length < 2) - { - throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); - } - - _pages = new List(capacity: 16); - - Levels = levels; - Mask = 0; - - foreach (var level in Levels) - { - Mask |= level.Mask; - } - } - - /// - /// Determines if the specified is in the range of the - /// . - /// - /// Guest address - /// if is valid; otherwise - public bool IsValid(ulong address) - { - return (address & ~Mask) == 0; - } - - /// - /// Gets a reference to the value at the specified guest . - /// - /// Guest address - /// Reference to the value at the specified guest - /// instance was disposed - /// is not mapped - public ref TEntry GetValue(ulong address) - { - ObjectDisposedException.ThrowIf(_disposed, this); - - if (!IsValid(address)) - { - throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address)); - } - - lock (_pages) - { - return ref GetPage(address)[Levels[^1].GetValue(address)]; - } - } - - /// - /// Gets the leaf page for the specified guest . - /// - /// Guest address - /// Leaf page for the specified guest - private TEntry* GetPage(ulong address) - { - TEntry** page = GetRootPage(); - - for (int i = 0; i < Levels.Length - 1; i++) - { - ref Level level = ref Levels[i]; - ref TEntry* nextPage = ref page[level.GetValue(address)]; - - if (nextPage == null) - { - ref Level nextLevel = ref Levels[i + 1]; - - nextPage = i == Levels.Length - 2 ? - (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) : - (TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false); - } - - page = (TEntry**)nextPage; - } - - return (TEntry*)page; - } - - /// - /// Lazily initialize and get the root page of the . - /// - /// Root page of the - private TEntry** GetRootPage() - { - if (_table == null) - { - _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false); - } - - return _table; - } - - /// - /// Allocates a block of memory of the specified type and length. - /// - /// Type of elements - /// Number of elements - /// Fill value - /// if leaf; otherwise - /// Allocated block - private IntPtr Allocate(int length, T fill, bool leaf) where T : unmanaged - { - var size = sizeof(T) * length; - var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); - var span = new Span((void*)page, length); - - span.Fill(fill); - - _pages.Add(page); - - TranslatorEventSource.Log.AddressTableAllocated(size, leaf); - - return page; - } - - /// - /// Releases all resources used by the instance. - /// - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - /// - /// Releases all unmanaged and optionally managed resources used by the - /// instance. - /// - /// to dispose managed resources also; otherwise just unmanaged resouces - protected virtual void Dispose(bool disposing) - { - if (!_disposed) - { - foreach (var page in _pages) - { - Marshal.FreeHGlobal(page); - } - - _disposed = true; - } - } - - /// - /// Frees resources used by the instance. - /// - ~AddressTable() - { - Dispose(false); - } - } -} diff --git a/src/ARMeilleure/Common/AddressTableLevel.cs b/src/ARMeilleure/Common/AddressTableLevel.cs new file mode 100644 index 000000000..6107726ee --- /dev/null +++ b/src/ARMeilleure/Common/AddressTableLevel.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Common +{ + /// + /// Represents a level in an . + /// + public readonly struct AddressTableLevel + { + /// + /// Gets the index of the in the guest address. + /// + public int Index { get; } + + /// + /// Gets the length of the in the guest address. + /// + public int Length { get; } + + /// + /// Gets the mask which masks the bits used by the . + /// + public ulong Mask => ((1ul << Length) - 1) << Index; + + /// + /// Initializes a new instance of the structure with the specified + /// and . + /// + /// Index of the + /// Length of the + public AddressTableLevel(int index, int length) + { + (Index, Length) = (index, length); + } + + /// + /// Gets the value of the from the specified guest . + /// + /// Guest address + /// Value of the from the specified guest + public int GetValue(ulong address) + { + return (int)((address & Mask) >> Index); + } + } +} diff --git a/src/ARMeilleure/Common/AddressTablePresets.cs b/src/ARMeilleure/Common/AddressTablePresets.cs new file mode 100644 index 000000000..e7eaf62cd --- /dev/null +++ b/src/ARMeilleure/Common/AddressTablePresets.cs @@ -0,0 +1,51 @@ +namespace ARMeilleure.Common +{ + public static class AddressTablePresets + { + private static readonly AddressTableLevel[] _levels64Bit = + new AddressTableLevel[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 2, 5), + }; + + private static readonly AddressTableLevel[] _levels32Bit = + new AddressTableLevel[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 1, 6), + }; + + private static readonly AddressTableLevel[] _levels64BitSparse = + new AddressTableLevel[] + { + new(23, 16), + new( 2, 21), + }; + + private static readonly AddressTableLevel[] _levels32BitSparse = + new AddressTableLevel[] + { + new(22, 10), + new( 1, 21), + }; + + public static AddressTableLevel[] GetArmPreset(bool for64Bits, bool sparse) + { + if (sparse) + { + return for64Bits ? _levels64BitSparse : _levels32BitSparse; + } + else + { + return for64Bits ? _levels64Bit : _levels32Bit; + } + } + } +} diff --git a/src/ARMeilleure/Common/Allocator.cs b/src/ARMeilleure/Common/Allocator.cs index 6905a614f..de6a77ebe 100644 --- a/src/ARMeilleure/Common/Allocator.cs +++ b/src/ARMeilleure/Common/Allocator.cs @@ -2,7 +2,7 @@ using System; namespace ARMeilleure.Common { - unsafe abstract class Allocator : IDisposable + public unsafe abstract class Allocator : IDisposable { public T* Allocate(ulong count = 1) where T : unmanaged { diff --git a/src/ARMeilleure/Common/IAddressTable.cs b/src/ARMeilleure/Common/IAddressTable.cs new file mode 100644 index 000000000..116ccdaad --- /dev/null +++ b/src/ARMeilleure/Common/IAddressTable.cs @@ -0,0 +1,51 @@ +using System; + +namespace ARMeilleure.Common +{ + public interface IAddressTable : IDisposable where TEntry : unmanaged + { + /// + /// True if the address table's bottom level is sparsely mapped. + /// This also ensures the second bottom level is filled with a dummy page rather than 0. + /// + bool Sparse { get; } + + /// + /// Gets the bits used by the of the instance. + /// + ulong Mask { get; } + + /// + /// Gets the s used by the instance. + /// + AddressTableLevel[] Levels { get; } + + /// + /// Gets or sets the default fill value of newly created leaf pages. + /// + TEntry Fill { get; set; } + + /// + /// Gets the base address of the . + /// + /// instance was disposed + IntPtr Base { get; } + + /// + /// Determines if the specified is in the range of the + /// . + /// + /// Guest address + /// if is valid; otherwise + bool IsValid(ulong address); + + /// + /// Gets a reference to the value at the specified guest . + /// + /// Guest address + /// Reference to the value at the specified guest + /// instance was disposed + /// is not mapped + ref TEntry GetValue(ulong address); + } +} diff --git a/src/ARMeilleure/Common/NativeAllocator.cs b/src/ARMeilleure/Common/NativeAllocator.cs index 93c48adda..102e33ebe 100644 --- a/src/ARMeilleure/Common/NativeAllocator.cs +++ b/src/ARMeilleure/Common/NativeAllocator.cs @@ -3,7 +3,7 @@ using System.Runtime.InteropServices; namespace ARMeilleure.Common { - unsafe sealed class NativeAllocator : Allocator + public unsafe sealed class NativeAllocator : Allocator { public static NativeAllocator Instance { get; } = new(); diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 2009bafda..ab8e1e3bd 100644 --- a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -193,6 +193,8 @@ namespace ARMeilleure.Instructions Operand hostAddress; + var table = context.FunctionTable; + // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback // onto the dispatch stub. if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value)) @@ -203,6 +205,36 @@ namespace ARMeilleure.Instructions hostAddress = context.Load(OperandType.I64, hostAddressAddr); } + else if (table.Sparse && table.Levels.Length == 2) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + // Deliberately attempts to avoid branches. + + var level0 = table.Levels[0]; + int clearBits0 = 64 - (level0.Index + level0.Length); + + Operand index = context.ShiftLeft( + context.ShiftRightUI(context.ShiftLeft(guestAddress, Const(clearBits0)), Const(clearBits0 + level0.Index)), + Const(3) + ); + + Operand tableBase = !context.HasPtc ? + Const(table.Base) : + Const(table.Base, Ptc.FunctionTableSymbol); + + Operand page = context.Load(OperandType.I64, context.Add(tableBase, index)); + + // Second level + var level1 = table.Levels[1]; + int clearBits1 = 64 - (level1.Index + level1.Length); + + Operand index2 = context.ShiftLeft( + context.ShiftRightUI(context.ShiftLeft(guestAddress, Const(clearBits1)), Const(clearBits1 + level1.Index)), + Const(3) + ); + + hostAddress = context.Load(OperandType.I64, context.Add(page, index2)); + } else { hostAddress = !context.HasPtc ? diff --git a/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs b/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs index 2ec5bc1b3..896d372d1 100644 --- a/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs +++ b/src/ARMeilleure/Signal/NativeSignalHandlerGenerator.cs @@ -8,7 +8,7 @@ namespace ARMeilleure.Signal { public static class NativeSignalHandlerGenerator { - public const int MaxTrackedRanges = 8; + public const int MaxTrackedRanges = 16; private const int StructAddressOffset = 0; private const int StructWriteOffset = 4; diff --git a/src/ARMeilleure/Translation/ArmEmitterContext.cs b/src/ARMeilleure/Translation/ArmEmitterContext.cs index e24074739..54cd97d53 100644 --- a/src/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/src/ARMeilleure/Translation/ArmEmitterContext.cs @@ -46,7 +46,7 @@ namespace ARMeilleure.Translation public IMemoryManager Memory { get; } public EntryTable CountTable { get; } - public AddressTable FunctionTable { get; } + public IAddressTable FunctionTable { get; } public TranslatorStubs Stubs { get; } public ulong EntryAddress { get; } @@ -62,7 +62,7 @@ namespace ARMeilleure.Translation public ArmEmitterContext( IMemoryManager memory, EntryTable countTable, - AddressTable funcTable, + IAddressTable funcTable, TranslatorStubs stubs, ulong entryAddress, bool highCq, diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index c2eed7a55..559a629c2 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 6950; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 6978; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -40,6 +40,7 @@ namespace ARMeilleure.Translation.PTC public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1); public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); + public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4); private const byte FillingByte = 0x00; private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; @@ -705,6 +706,10 @@ namespace ARMeilleure.Translation.PTC { imm = translator.Stubs.DispatchStub; } + else if (symbol == FunctionTableSymbol) + { + imm = translator.FunctionTable.Base; + } if (imm == null) { diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs index 014b12035..45758059c 100644 --- a/src/ARMeilleure/Translation/Translator.cs +++ b/src/ARMeilleure/Translation/Translator.cs @@ -22,33 +22,13 @@ namespace ARMeilleure.Translation { public class Translator { - private static readonly AddressTable.Level[] _levels64Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 2, 5), - }; - - private static readonly AddressTable.Level[] _levels32Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 1, 6), - }; - private readonly IJitMemoryAllocator _allocator; private readonly ConcurrentQueue> _oldFuncs; private readonly Ptc _ptc; internal TranslatorCache Functions { get; } - internal AddressTable FunctionTable { get; } + internal IAddressTable FunctionTable { get; } internal EntryTable CountTable { get; } internal TranslatorStubs Stubs { get; } internal TranslatorQueue Queue { get; } @@ -57,7 +37,7 @@ namespace ARMeilleure.Translation private Thread[] _backgroundTranslationThreads; private volatile int _threadCount; - public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits) + public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, IAddressTable functionTable) { _allocator = allocator; Memory = memory; @@ -72,7 +52,7 @@ namespace ARMeilleure.Translation CountTable = new EntryTable(); Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = functionTable; Stubs = new TranslatorStubs(FunctionTable); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; diff --git a/src/ARMeilleure/Translation/TranslatorStubs.cs b/src/ARMeilleure/Translation/TranslatorStubs.cs index d80823a8b..379caa283 100644 --- a/src/ARMeilleure/Translation/TranslatorStubs.cs +++ b/src/ARMeilleure/Translation/TranslatorStubs.cs @@ -19,7 +19,7 @@ namespace ARMeilleure.Translation private bool _disposed; - private readonly AddressTable _functionTable; + private readonly IAddressTable _functionTable; private readonly Lazy _dispatchStub; private readonly Lazy _dispatchLoop; private readonly Lazy _contextWrapper; @@ -86,7 +86,7 @@ namespace ARMeilleure.Translation /// /// Function table used to store pointers to the functions that the guest code will call /// is null - public TranslatorStubs(AddressTable functionTable) + public TranslatorStubs(IAddressTable functionTable) { ArgumentNullException.ThrowIfNull(functionTable); diff --git a/src/Ryujinx.Cpu/AddressTable.cs b/src/Ryujinx.Cpu/AddressTable.cs new file mode 100644 index 000000000..828e01597 --- /dev/null +++ b/src/Ryujinx.Cpu/AddressTable.cs @@ -0,0 +1,486 @@ +using ARMeilleure.Memory; +using Ryujinx.Common; +using Ryujinx.Cpu.Signal; +using Ryujinx.Memory; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Threading; +using static Ryujinx.Cpu.MemoryEhMeilleure; + +namespace ARMeilleure.Common +{ + /// + /// Represents a table of guest address to a value. + /// + /// Type of the value + public unsafe class AddressTable : IAddressTable where TEntry : unmanaged + { + /// + /// Represents a page of the address table. + /// + private readonly struct AddressTablePage + { + /// + /// True if the allocation belongs to a sparse block, false otherwise. + /// + public readonly bool IsSparse; + + /// + /// Base address for the page. + /// + public readonly IntPtr Address; + + public AddressTablePage(bool isSparse, IntPtr address) + { + IsSparse = isSparse; + Address = address; + } + } + + /// + /// A sparsely mapped block of memory with a signal handler to map pages as they're accessed. + /// + private readonly struct TableSparseBlock : IDisposable + { + public readonly SparseMemoryBlock Block; + private readonly TrackingEventDelegate _trackingEvent; + + public TableSparseBlock(ulong size, Action ensureMapped, PageInitDelegate pageInit) + { + var block = new SparseMemoryBlock(size, pageInit, null); + + _trackingEvent = (ulong address, ulong size, bool write) => + { + ulong pointer = (ulong)block.Block.Pointer + address; + + ensureMapped((IntPtr)pointer); + + return pointer; + }; + + bool added = NativeSignalHandler.AddTrackedRegion( + (nuint)block.Block.Pointer, + (nuint)(block.Block.Pointer + (IntPtr)block.Block.Size), + Marshal.GetFunctionPointerForDelegate(_trackingEvent)); + + if (!added) + { + throw new InvalidOperationException("Number of allowed tracked regions exceeded."); + } + + Block = block; + } + + public void Dispose() + { + NativeSignalHandler.RemoveTrackedRegion((nuint)Block.Block.Pointer); + + Block.Dispose(); + } + } + + private bool _disposed; + private TEntry** _table; + private readonly List _pages; + private TEntry _fill; + + private readonly MemoryBlock _sparseFill; + private readonly SparseMemoryBlock _fillBottomLevel; + private readonly TEntry* _fillBottomLevelPtr; + + private readonly List _sparseReserved; + private readonly ReaderWriterLockSlim _sparseLock; + + private ulong _sparseBlockSize; + private ulong _sparseReservedOffset; + + public bool Sparse { get; } + + /// + public ulong Mask { get; } + + /// + public AddressTableLevel[] Levels { get; } + + /// + public TEntry Fill + { + get + { + return _fill; + } + set + { + UpdateFill(value); + } + } + + /// + public IntPtr Base + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_pages) + { + return (IntPtr)GetRootPage(); + } + } + } + + /// + /// Constructs a new instance of the class with the specified list of + /// . + /// + /// Levels for the address table + /// True if the bottom page should be sparsely mapped + /// is null + /// Length of is less than 2 + public AddressTable(AddressTableLevel[] levels, bool sparse) + { + ArgumentNullException.ThrowIfNull(levels); + + if (levels.Length < 2) + { + throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); + } + + _pages = new List(capacity: 16); + + Levels = levels; + Mask = 0; + + foreach (var level in Levels) + { + Mask |= level.Mask; + } + + Sparse = sparse; + + if (sparse) + { + // If the address table is sparse, allocate a fill block + + _sparseFill = new MemoryBlock(65536, MemoryAllocationFlags.Mirrorable); + + ulong bottomLevelSize = (1ul << levels.Last().Length) * (ulong)sizeof(TEntry); + + _fillBottomLevel = new SparseMemoryBlock(bottomLevelSize, null, _sparseFill); + _fillBottomLevelPtr = (TEntry*)_fillBottomLevel.Block.Pointer; + + _sparseReserved = new List(); + _sparseLock = new ReaderWriterLockSlim(); + + _sparseBlockSize = bottomLevelSize; + } + } + + /// + /// Create an instance for an ARM function table. + /// Selects the best table structure for A32/A64, taking into account the selected memory manager type. + /// + /// True if the guest is A64, false otherwise + /// Memory manager type + /// An for ARM function lookup + public static AddressTable CreateForArm(bool for64Bits, MemoryManagerType type) + { + // Assume software memory means that we don't want to use any signal handlers. + bool sparse = type != MemoryManagerType.SoftwareMmu && type != MemoryManagerType.SoftwarePageTable; + + return new AddressTable(AddressTablePresets.GetArmPreset(for64Bits, sparse), sparse); + } + + /// + /// Update the fill value for the bottom level of the table. + /// + /// New fill value + private void UpdateFill(TEntry fillValue) + { + if (_sparseFill != null) + { + Span span = _sparseFill.GetSpan(0, (int)_sparseFill.Size); + MemoryMarshal.Cast(span).Fill(fillValue); + } + + _fill = fillValue; + } + + /// + /// Signal that the given code range exists. + /// + /// + /// + public void SignalCodeRange(ulong address, ulong size) + { + AddressTableLevel bottom = Levels.Last(); + ulong bottomLevelEntries = 1ul << bottom.Length; + + ulong entryIndex = address >> bottom.Index; + ulong entries = size >> bottom.Index; + entries += entryIndex - BitUtils.AlignDown(entryIndex, bottomLevelEntries); + + _sparseBlockSize = Math.Max(_sparseBlockSize, BitUtils.AlignUp(entries, bottomLevelEntries) * (ulong)sizeof(TEntry)); + } + + /// + public bool IsValid(ulong address) + { + return (address & ~Mask) == 0; + } + + /// + public ref TEntry GetValue(ulong address) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + if (!IsValid(address)) + { + throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address)); + } + + lock (_pages) + { + TEntry* page = GetPage(address); + + int index = Levels[^1].GetValue(address); + + EnsureMapped((IntPtr)(page + index)); + + return ref page[index]; + } + } + + /// + /// Gets the leaf page for the specified guest . + /// + /// Guest address + /// Leaf page for the specified guest + private TEntry* GetPage(ulong address) + { + TEntry** page = GetRootPage(); + + for (int i = 0; i < Levels.Length - 1; i++) + { + ref AddressTableLevel level = ref Levels[i]; + ref TEntry* nextPage = ref page[level.GetValue(address)]; + + if (nextPage == null || nextPage == _fillBottomLevelPtr) + { + ref AddressTableLevel nextLevel = ref Levels[i + 1]; + + if (i == Levels.Length - 2) + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true); + } + else + { + nextPage = (TEntry*)Allocate(1 << nextLevel.Length, GetFillValue(i), leaf: false); + } + } + + page = (TEntry**)nextPage; + } + + return (TEntry*)page; + } + + /// + /// Ensure the given pointer is mapped in any overlapping sparse reservations. + /// + /// Pointer to be mapped + private void EnsureMapped(IntPtr ptr) + { + if (Sparse) + { + // Check sparse allocations to see if the pointer is in any of them. + // Ensure the page is committed if there's a match. + + _sparseLock.EnterReadLock(); + + try + { + foreach (TableSparseBlock reserved in _sparseReserved) + { + SparseMemoryBlock sparse = reserved.Block; + + if (ptr >= sparse.Block.Pointer && ptr < sparse.Block.Pointer + (IntPtr)sparse.Block.Size) + { + sparse.EnsureMapped((ulong)(ptr - sparse.Block.Pointer)); + + break; + } + } + } + finally + { + _sparseLock.ExitReadLock(); + } + } + } + + /// + /// Get the fill value for a non-leaf level of the table. + /// + /// Level to get the fill value for + /// The fill value + private IntPtr GetFillValue(int level) + { + if (_fillBottomLevel != null && level == Levels.Length - 2) + { + return (IntPtr)_fillBottomLevelPtr; + } + else + { + return IntPtr.Zero; + } + } + + /// + /// Lazily initialize and get the root page of the . + /// + /// Root page of the + private TEntry** GetRootPage() + { + if (_table == null) + { + _table = (TEntry**)Allocate(1 << Levels[0].Length, GetFillValue(0), leaf: false); + } + + return _table; + } + + /// + /// Initialize a leaf page with the fill value. + /// + /// Page to initialize + private void InitLeafPage(Span page) + { + MemoryMarshal.Cast(page).Fill(_fill); + } + + /// + /// Reserve a new sparse block, and add it to the list. + /// + /// The new sparse block that was added + private TableSparseBlock ReserveNewSparseBlock() + { + var block = new TableSparseBlock(_sparseBlockSize, EnsureMapped, InitLeafPage); + + _sparseReserved.Add(block); + _sparseReservedOffset = 0; + + return block; + } + + /// + /// Allocates a block of memory of the specified type and length. + /// + /// Type of elements + /// Number of elements + /// Fill value + /// if leaf; otherwise + /// Allocated block + private IntPtr Allocate(int length, T fill, bool leaf) where T : unmanaged + { + var size = sizeof(T) * length; + + AddressTablePage page; + + if (Sparse && leaf) + { + _sparseLock.EnterWriteLock(); + + SparseMemoryBlock block; + + if (_sparseReserved.Count == 0) + { + block = ReserveNewSparseBlock().Block; + } + else + { + block = _sparseReserved.Last().Block; + + if (_sparseReservedOffset == block.Block.Size) + { + block = ReserveNewSparseBlock().Block; + } + } + + page = new AddressTablePage(true, block.Block.Pointer + (IntPtr)_sparseReservedOffset); + + _sparseReservedOffset += (ulong)size; + + _sparseLock.ExitWriteLock(); + } + else + { + var address = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); + page = new AddressTablePage(false, address); + + var span = new Span((void*)page.Address, length); + span.Fill(fill); + } + + _pages.Add(page); + + //TranslatorEventSource.Log.AddressTableAllocated(size, leaf); + + return page.Address; + } + + /// + /// Releases all resources used by the instance. + /// + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// + /// Releases all unmanaged and optionally managed resources used by the + /// instance. + /// + /// to dispose managed resources also; otherwise just unmanaged resouces + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + foreach (var page in _pages) + { + if (!page.IsSparse) + { + Marshal.FreeHGlobal(page.Address); + } + } + + if (Sparse) + { + foreach (TableSparseBlock block in _sparseReserved) + { + block.Dispose(); + } + + _sparseReserved.Clear(); + + _fillBottomLevel.Dispose(); + _sparseFill.Dispose(); + _sparseLock.Dispose(); + } + + _disposed = true; + } + } + + /// + /// Frees resources used by the instance. + /// + ~AddressTable() + { + Dispose(false); + } + } +} diff --git a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs index 9893c59b2..bd512a758 100644 --- a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs +++ b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.Translation; using Ryujinx.Cpu.Signal; @@ -9,11 +10,14 @@ namespace Ryujinx.Cpu.Jit { private readonly ITickSource _tickSource; private readonly Translator _translator; + private readonly AddressTable _functionTable; public JitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit); + _functionTable = AddressTable.CreateForArm(for64Bit, memory.Type); + + _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, _functionTable); if (memory.Type.IsHostMappedOrTracked()) { @@ -55,6 +59,7 @@ namespace Ryujinx.Cpu.Jit /// public void PrepareCodeRange(ulong address, ulong size) { + _functionTable.SignalCodeRange(address, size); _translator.PrepareCodeRange(address, size); } diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs index 3b1ff5a2a..b2192f3d4 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs @@ -140,6 +140,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 bool isTail = false) { int tempRegister; + int tempGuestAddress = -1; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && funcTable != null && funcTable.Sparse && funcTable.Levels.Length == 2; if (guestAddress.Kind == OperandKind.Constant) { @@ -153,9 +156,16 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup && guestAddress.Value == 0) + { + // X0 will be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } - tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; + tempRegister = NextFreeRegister(1, tempGuestAddress); if (!isTail) { @@ -176,6 +186,45 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(NextFreeRegister(tempRegister + 1, tempGuestAddress)); + + if (tempGuestAddress != -1) + { + guestAddress = Register(tempGuestAddress); + } + + var level0 = funcTable.Levels[0]; + asm.Ubfx(indexReg, guestAddress, level0.Index, level0.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + + var level1 = funcTable.Levels[1]; + asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Load the final branch address + asm.LdrRiUn(rn, rn, 0); + + if (tempGuestAddress != -1) + { + regAlloc.FreeTempGprRegister(tempGuestAddress); + } + } else { asm.Mov(rn, (ulong)funcPtr); @@ -252,5 +301,20 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } + + private static int NextFreeRegister(int start, int avoid) + { + if (start == avoid) + { + start++; + } + + return start; + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs index 82cb29d73..920f61c10 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs @@ -305,6 +305,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 bool isTail = false) { int tempRegister; + int tempGuestAddress = -1; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && funcTable != null && funcTable.Sparse && funcTable.Levels.Length == 2; if (guestAddress.Kind == OperandKind.Constant) { @@ -318,9 +321,16 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup && guestAddress.Value == 0) + { + // X0 will be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } - tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; + tempRegister = NextFreeRegister(1, tempGuestAddress); if (!isTail) { @@ -341,6 +351,45 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(NextFreeRegister(tempRegister + 1, tempGuestAddress)); + + if (tempGuestAddress != -1) + { + guestAddress = Register(tempGuestAddress); + } + + var level0 = funcTable.Levels[0]; + asm.Ubfx(indexReg, guestAddress, level0.Index, level0.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + + var level1 = funcTable.Levels[1]; + asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Load the final branch address + asm.LdrRiUn(rn, rn, 0); + + if (tempGuestAddress != -1) + { + regAlloc.FreeTempGprRegister(tempGuestAddress); + } + } else { asm.Mov(rn, (ulong)funcPtr); @@ -613,5 +662,20 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } + + private static int NextFreeRegister(int start, int avoid) + { + if (start == avoid) + { + start++; + } + + return start; + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs index b63636e39..c39e0e67e 100644 --- a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs +++ b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using Ryujinx.Cpu.Jit; using Ryujinx.Cpu.LightningJit.State; @@ -8,11 +9,15 @@ namespace Ryujinx.Cpu.LightningJit { private readonly ITickSource _tickSource; private readonly Translator _translator; + private readonly AddressTable _functionTable; public LightningJitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(memory, for64Bit); + _functionTable = AddressTable.CreateForArm(for64Bit, memory.Type); + + _translator = new Translator(memory, _functionTable); + memory.UnmapEvent += UnmapHandler; } @@ -48,6 +53,7 @@ namespace Ryujinx.Cpu.LightningJit /// public void PrepareCodeRange(ulong address, ulong size) { + _functionTable.SignalCodeRange(address, size); } public void Dispose() diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index d62410253..bb12ac5fd 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -19,25 +19,6 @@ namespace Ryujinx.Cpu.LightningJit // Should be enabled on platforms that enforce W^X. private static bool IsNoWxPlatform => false; - private static readonly AddressTable.Level[] _levels64Bit = - new AddressTable.Level[] - { - new(31, 17), - new(23, 8), - new(15, 8), - new( 7, 8), - new( 2, 5), - }; - - private static readonly AddressTable.Level[] _levels32Bit = - new AddressTable.Level[] - { - new(23, 9), - new(15, 8), - new( 7, 8), - new( 1, 6), - }; - private readonly ConcurrentQueue> _oldFuncs; private readonly NoWxCache _noWxCache; private bool _disposed; @@ -47,7 +28,7 @@ namespace Ryujinx.Cpu.LightningJit internal TranslatorStubs Stubs { get; } internal IMemoryManager Memory { get; } - public Translator(IMemoryManager memory, bool for64Bits) + public Translator(IMemoryManager memory, AddressTable functionTable) { Memory = memory; @@ -63,7 +44,7 @@ namespace Ryujinx.Cpu.LightningJit } Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = functionTable; Stubs = new TranslatorStubs(FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; diff --git a/src/Ryujinx.Memory/SparseMemoryBlock.cs b/src/Ryujinx.Memory/SparseMemoryBlock.cs new file mode 100644 index 000000000..523685de1 --- /dev/null +++ b/src/Ryujinx.Memory/SparseMemoryBlock.cs @@ -0,0 +1,125 @@ +using Ryujinx.Common; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; + +namespace Ryujinx.Memory +{ + public delegate void PageInitDelegate(Span page); + + public class SparseMemoryBlock : IDisposable + { + private const ulong MapGranularity = 1UL << 17; + + private readonly PageInitDelegate _pageInit; + + private readonly object _lock = new object(); + private readonly ulong _pageSize; + private readonly MemoryBlock _reservedBlock; + private readonly List _mappedBlocks; + private ulong _mappedBlockUsage; + + private readonly ulong[] _mappedPageBitmap; + + public MemoryBlock Block => _reservedBlock; + + public SparseMemoryBlock(ulong size, PageInitDelegate pageInit, MemoryBlock fill) + { + _pageSize = MemoryBlock.GetPageSize(); + _reservedBlock = new MemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.ViewCompatible); + _mappedBlocks = new List(); + _pageInit = pageInit; + + int pages = (int)BitUtils.DivRoundUp(size, _pageSize); + int bitmapEntries = BitUtils.DivRoundUp(pages, 64); + _mappedPageBitmap = new ulong[bitmapEntries]; + + if (fill != null) + { + // Fill the block with mappings from the fill block. + + if (fill.Size % _pageSize != 0) + { + throw new ArgumentException("Fill memory block should be page aligned.", nameof(fill)); + } + + int repeats = (int)BitUtils.DivRoundUp(size, fill.Size); + + ulong offset = 0; + for (int i = 0; i < repeats; i++) + { + _reservedBlock.MapView(fill, 0, offset, Math.Min(fill.Size, size - offset)); + offset += fill.Size; + } + } + + // If a fill block isn't provided, the pages that aren't EnsureMapped are unmapped. + // The caller can rely on signal handler to fill empty pages instead. + } + + private void MapPage(ulong pageOffset) + { + // Take a page from the latest mapped block. + MemoryBlock block = _mappedBlocks.LastOrDefault(); + + if (block == null || _mappedBlockUsage == MapGranularity) + { + // Need to map some more memory. + + block = new MemoryBlock(MapGranularity, MemoryAllocationFlags.Mirrorable); + + _mappedBlocks.Add(block); + + _mappedBlockUsage = 0; + } + + _pageInit(block.GetSpan(_mappedBlockUsage, (int)_pageSize)); + _reservedBlock.MapView(block, _mappedBlockUsage, pageOffset, _pageSize); + + _mappedBlockUsage += _pageSize; + } + + public void EnsureMapped(ulong offset) + { + int pageIndex = (int)(offset / _pageSize); + int bitmapIndex = pageIndex >> 6; + + ref ulong entry = ref _mappedPageBitmap[bitmapIndex]; + ulong bit = 1UL << (pageIndex & 63); + + if ((Volatile.Read(ref entry) & bit) == 0) + { + // Not mapped. + + lock (_lock) + { + // Check the bit while locked to make sure that this only happens once. + + ulong lockedEntry = Volatile.Read(ref entry); + + if ((lockedEntry & bit) == 0) + { + MapPage(offset & ~(_pageSize - 1)); + + lockedEntry |= bit; + + Interlocked.Exchange(ref entry, lockedEntry); + } + } + } + } + + public void Dispose() + { + _reservedBlock.Dispose(); + + foreach (MemoryBlock block in _mappedBlocks) + { + block.Dispose(); + } + + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Ryujinx.Tests/Cpu/CpuContext.cs b/src/Ryujinx.Tests/Cpu/CpuContext.cs index 96b4965a2..81e8ba8c9 100644 --- a/src/Ryujinx.Tests/Cpu/CpuContext.cs +++ b/src/Ryujinx.Tests/Cpu/CpuContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.State; using ARMeilleure.Translation; @@ -12,7 +13,7 @@ namespace Ryujinx.Tests.Cpu public CpuContext(IMemoryManager memory, bool for64Bit) { - _translator = new Translator(new JitMemoryAllocator(), memory, for64Bit); + _translator = new Translator(new JitMemoryAllocator(), memory, AddressTable.CreateForArm(for64Bit, memory.Type)); memory.UnmapEvent += UnmapHandler; } diff --git a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs index 2a4775a31..2a8a98179 100644 --- a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs +++ b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Translation; using NUnit.Framework; using Ryujinx.Cpu.Jit; @@ -17,7 +18,10 @@ namespace Ryujinx.Tests.Cpu private static void EnsureTranslator() { // Create a translator, as one is needed to register the signal handler or emit methods. - _translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true); + _translator ??= new Translator( + new JitMemoryAllocator(), + new MockMemoryManager(), + AddressTable.CreateForArm(true, ARMeilleure.Memory.MemoryManagerType.SoftwarePageTable)); } [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)] diff --git a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs index ace68e5c2..6cdfb662c 100644 --- a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs +++ b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Signal; using ARMeilleure.Translation; using NUnit.Framework; @@ -53,7 +54,10 @@ namespace Ryujinx.Tests.Memory private static void EnsureTranslator() { // Create a translator, as one is needed to register the signal handler or emit methods. - _translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true); + _translator ??= new Translator( + new JitMemoryAllocator(), + new MockMemoryManager(), + AddressTable.CreateForArm(true, ARMeilleure.Memory.MemoryManagerType.SoftwarePageTable)); } [Test]