diff --git a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs index 3b1ff5a2a..9d7581580 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm32/Target/Arm64/InstEmitFlow.cs @@ -140,6 +140,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 bool isTail = false) { int tempRegister; + int tempGuestAddress = 0; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && funcTable != null && funcTable.Levels.Length == 2; if (guestAddress.Kind == OperandKind.Constant) { @@ -153,6 +156,13 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup) + { + // Might be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; @@ -176,6 +186,47 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(3); + guestAddress = Register(tempGuestAddress); + + var level0 = funcTable.Levels[0]; + asm.Ubfx(indexReg, guestAddress, level0.Index, level0.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + + var level1 = funcTable.Levels[1]; + asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Is the page address zero? Make sure to use the fallback if it is. + asm.Tst(rn, rn); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Reuse the index register for the fallback + ulong fallback = (ulong)funcTable.Fallback; + asm.Mov(indexReg, fallback); + + asm.Csel(rn, indexReg, rn, ArmCondition.Eq); + + // Load the final branch address + asm.LdrRiUn(rn, rn, 0); + + regAlloc.FreeTempGprRegister(tempGuestAddress); + } else { asm.Mov(rn, (ulong)funcPtr); @@ -252,5 +303,10 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs index 82cb29d73..342f5c698 100644 --- a/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs +++ b/src/Ryujinx.Cpu/LightningJit/Arm64/Target/Arm64/InstEmitSystem.cs @@ -305,6 +305,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 bool isTail = false) { int tempRegister; + int tempGuestAddress = 0; + + bool inlineLookup = guestAddress.Kind != OperandKind.Constant && funcTable != null && funcTable.Levels.Length == 2; if (guestAddress.Kind == OperandKind.Constant) { @@ -318,6 +321,13 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 else { asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset); + + if (inlineLookup) + { + // Might be overwritten. Move the address to a temp register. + tempGuestAddress = regAlloc.AllocateTempGprRegister(); + asm.Mov(Register(tempGuestAddress), guestAddress); + } } tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1; @@ -341,6 +351,47 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 asm.Mov(rn, funcPtrLoc & ~0xfffUL); asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL)); } + else if (inlineLookup) + { + // Inline table lookup. Only enabled when the sparse function table is enabled with 2 levels. + + Operand indexReg = Register(3); + guestAddress = Register(tempGuestAddress); + + var level0 = funcTable.Levels[0]; + asm.Ubfx(indexReg, guestAddress, level0.Index, level0.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + ulong tableBase = (ulong)funcTable.Base; + + // Index into the table. + asm.Mov(rn, tableBase); + asm.Add(rn, rn, indexReg); + + // Load the page address. + asm.LdrRiUn(rn, rn, 0); + + var level1 = funcTable.Levels[1]; + asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); + asm.Lsl(indexReg, indexReg, Const(3)); + + // Is the page address zero? Make sure to use the fallback if it is. + asm.Tst(rn, rn); + + // Index into the page. + asm.Add(rn, rn, indexReg); + + // Reuse the index register for the fallback + ulong fallback = (ulong)funcTable.Fallback; + asm.Mov(indexReg, fallback); + + asm.Csel(rn, indexReg, rn, ArmCondition.Eq); + + // Load the final branch address + asm.LdrRiUn(rn, rn, 0); + + regAlloc.FreeTempGprRegister(tempGuestAddress); + } else { asm.Mov(rn, (ulong)funcPtr); @@ -613,5 +664,10 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64 { return new Operand(register, RegisterType.Integer, type); } + + private static Operand Const(long value, OperandType type = OperandType.I64) + { + return new Operand(type, (ulong)value); + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index d62410253..8b1b875f4 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -16,6 +16,8 @@ namespace Ryujinx.Cpu.LightningJit { class Translator : IDisposable { + private const bool UseSparseTable = true; + // Should be enabled on platforms that enforce W^X. private static bool IsNoWxPlatform => false; @@ -38,6 +40,20 @@ namespace Ryujinx.Cpu.LightningJit new( 1, 6), }; + private static readonly AddressTable.Level[] _levels64BitSparse = + new AddressTable.Level[] + { + new(23, 16), + new( 2, 21), + }; + + private static readonly AddressTable.Level[] _levels32BitSparse = + new AddressTable.Level[] + { + new(22, 10), + new( 1, 21), + }; + private readonly ConcurrentQueue> _oldFuncs; private readonly NoWxCache _noWxCache; private bool _disposed; @@ -62,8 +78,19 @@ namespace Ryujinx.Cpu.LightningJit JitCache.Initialize(new JitMemoryAllocator(forJit: true)); } + AddressTable.Level[] levels; + + if (UseSparseTable) + { + levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; + } + else + { + levels = for64Bits ? _levels64Bit : _levels32Bit; + } + Functions = new TranslatorCache(); - FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); + FunctionTable = new AddressTable(levels); Stubs = new TranslatorStubs(FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;