JitArm64: Implement "soft MMU"

This is used when fastmem isn't available. Instead of always falling
back to the C++ code in MMU.cpp, the JIT translates addresses on its
own by looking them up in a table that Dolphin constructs. This is
slower than fastmem, but faster than the old non-fastmem code.

This is primarily useful for iOS, since that's the only major platform
nowadays where you can't reliably get fastmem. I think it would make
sense to merge this feature to master despite this, since there's
nothing actually iOS-specific about the feature. It would be of use
for me when I have to disable fastmem to stop Android Studio from
constantly breaking on segfaults, for instance.

Co-authored-by: OatmealDome <julian@oatmealdome.me>
This commit is contained in:
JosJuice 2022-06-11 15:49:08 +02:00
parent bcc64a05b3
commit 3dce1df00e
7 changed files with 94 additions and 42 deletions

View File

@ -47,6 +47,8 @@ namespace Memory
// Store the MemArena here
u8* physical_base = nullptr;
u8* logical_base = nullptr;
u8* physical_page_mappings_base = nullptr;
u8* logical_page_mappings_base = nullptr;
static bool is_fastmem_arena_initialized = false;
// The MemArena class
@ -223,6 +225,9 @@ static std::array<PhysicalMemoryRegion, 4> s_physical_regions;
static std::vector<LogicalMemoryView> logical_mapped_entries;
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_physical_page_mappings;
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_logical_page_mappings;
void Init()
{
const auto get_mem1_size = [] {
@ -280,6 +285,8 @@ void Init()
}
g_arena.GrabSHMSegment(mem_size);
s_physical_page_mappings.fill(nullptr);
// Create an anonymous view of the physical memory
for (const PhysicalMemoryRegion& region : s_physical_regions)
{
@ -295,8 +302,17 @@ void Init()
region.physical_address, region.size);
exit(0);
}
for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE)
{
const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT;
s_physical_page_mappings[index] = *region.out_pointer + i;
}
}
physical_page_mappings_base = reinterpret_cast<u8*>(s_physical_page_mappings.data());
logical_page_mappings_base = reinterpret_cast<u8*>(s_logical_page_mappings.data());
InitMMIO(wii);
Clear();
@ -347,14 +363,14 @@ bool InitFastmemArena()
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
{
if (!is_fastmem_arena_initialized)
return;
for (auto& entry : logical_mapped_entries)
{
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
}
logical_mapped_entries.clear();
s_logical_page_mappings.fill(nullptr);
for (u32 i = 0; i < dbat_table.size(); ++i)
{
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
@ -375,19 +391,27 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
if (intersection_start < intersection_end)
{
// Found an overlapping region; map it.
u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
if (!mapped_pointer)
if (is_fastmem_arena_initialized)
{
PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
intersection_start, mapped_size, logical_address);
exit(0);
u32 position = physical_region.shm_position + intersection_start - mapping_address;
u8* base = logical_base + logical_address + intersection_start - translated_address;
u32 mapped_size = intersection_end - intersection_start;
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
if (!mapped_pointer)
{
PanicAlertFmt(
"Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
intersection_start, mapped_size, logical_address);
exit(0);
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
}
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
s_logical_page_mappings[i] =
*physical_region.out_pointer + intersection_start - mapping_address;
}
}
}

View File

@ -27,6 +27,11 @@ namespace Memory
extern u8* physical_base;
extern u8* logical_base;
// This page table is used for a "soft MMU" implementation when
// setting up the full memory map in process memory isn't possible.
extern u8* physical_page_mappings_base;
extern u8* logical_page_mappings_base;
// The actual memory used for backing the memory map.
extern u8* m_pRAM;
extern u8* m_pEXRAM;

View File

@ -216,9 +216,6 @@ protected:
void DumpCode(const u8* start, const u8* end);
// This enum is used for selecting an implementation of EmitBackpatchRoutine.
//
// The below descriptions of each enum entry apply when jo.fastmem_arena is true.
// If jo.fastmem_arena is false, the slow C++ code is always called instead.
enum class MemAccessMode
{
// Always calls the slow C++ code. For performance reasons, should generally only be used if
@ -227,8 +224,10 @@ protected:
// Only emits fast access code. Must only be used if the guest address is known in advance
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
AlwaysUnsafe,
// Best in most cases. Tries to run fast access code, and if that fails, uses backpatching to
// replace the code with a call to the slow C++ code.
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
// then branches to either the fast access code or the slow C++ code.
Auto,
};
@ -249,11 +248,12 @@ protected:
//
// Additional scratch registers are used in the following situations:
//
// mode == Auto && emitting_routine: X2
// mode == Auto && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0
// mode == Auto && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
//
// mode != AlwaysUnsafe || !jo.fastmem_arena:
// mode != AlwaysUnsafe:
// X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push)
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),

View File

@ -60,8 +60,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
const bool emit_fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = !jo.fastmem_arena || mode != MemAccessMode::AlwaysUnsafe;
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
bool in_far_code = false;
const u8* fastmem_start = GetCodePtr();
@ -69,7 +69,29 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (emit_fastmem)
{
if (emit_slowmem && emitting_routine)
ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;
if (!jo.fastmem_arena)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W8; // TODO
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));
if (emit_slowmem)
{
FixupBranch pass = CBNZ(memory_base);
slowmem_fixup = B();
SetJumpTarget(pass);
}
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
}
else if (emit_slowmem && emitting_routine)
{
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W2;
@ -82,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
ARM64Reg temp = ARM64Reg::D0;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
m_float_emit.STR(access_size, temp, MEM_REG, addr);
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
}
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset);
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
true, false);
@ -97,28 +119,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(temp, MEM_REG, addr);
STR(temp, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
STRH(temp, MEM_REG, addr);
STRH(temp, memory_base, memory_offset);
else
STRB(temp, MEM_REG, addr);
STRB(temp, memory_base, memory_offset);
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
// This literally only stores 32bytes of zeros to the target address
ARM64Reg temp = ARM64Reg::X30;
ADD(temp, addr, MEM_REG);
ADD(temp, memory_base, memory_offset);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
LDR(RS, MEM_REG, addr);
LDR(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
LDRH(RS, MEM_REG, addr);
LDRH(RS, memory_base, memory_offset);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(RS, MEM_REG, addr);
LDRB(RS, memory_base, memory_offset);
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
}
@ -134,7 +156,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
in_far_code = true;
SwitchToFarCode();
if (!emitting_routine)
if (jo.fastmem_arena && !emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
fastmem_area->fastmem_code = fastmem_start;
@ -264,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (in_far_code)
{
if (emitting_routine)
if (slowmem_fixup)
{
FixupBranch done = B();
SwitchToNearCode();

View File

@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder()
ARM64Reg::W11,
ARM64Reg::W10,
ARM64Reg::W9,
ARM64Reg::W8,
// ARM64Reg::W8,
ARM64Reg::W7,
ARM64Reg::W6,
ARM64Reg::W5,

View File

@ -93,10 +93,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
SetJumpTarget(membaseend);
// iCache[(address >> 2) & iCache_Mask];
@ -141,10 +141,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags and jump to next block.
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
BR(ARM64Reg::X0);
SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base);
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
BR(ARM64Reg::X0);
// Call JIT

View File

@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address);
constexpr int BAT_INDEX_SHIFT = 17;
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT);
constexpr u32 BAT_MAPPED_BIT = 0x1;
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
constexpr u32 BAT_WI_BIT = 0x4;
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
using BatTable = std::array<u32, BAT_PAGE_COUNT>; // 128 KB
extern BatTable ibat_table;
extern BatTable dbat_table;
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)