JitArm64: Implement "soft MMU"
This is used when fastmem isn't available. Instead of always falling back to the C++ code in MMU.cpp, the JIT translates addresses on its own by looking them up in a table that Dolphin constructs. This is slower than fastmem, but faster than the old non-fastmem code. This is primarily useful for iOS, since that's the only major platform nowadays where you can't reliably get fastmem. I think it would make sense to merge this feature to master despite this, since there's nothing actually iOS-specific about the feature. It would be of use for me when I have to disable fastmem to stop Android Studio from constantly breaking on segfaults, for instance. Co-authored-by: OatmealDome <julian@oatmealdome.me>
This commit is contained in:
parent
bcc64a05b3
commit
3dce1df00e
|
@ -47,6 +47,8 @@ namespace Memory
|
|||
// Store the MemArena here
|
||||
u8* physical_base = nullptr;
|
||||
u8* logical_base = nullptr;
|
||||
u8* physical_page_mappings_base = nullptr;
|
||||
u8* logical_page_mappings_base = nullptr;
|
||||
static bool is_fastmem_arena_initialized = false;
|
||||
|
||||
// The MemArena class
|
||||
|
@ -223,6 +225,9 @@ static std::array<PhysicalMemoryRegion, 4> s_physical_regions;
|
|||
|
||||
static std::vector<LogicalMemoryView> logical_mapped_entries;
|
||||
|
||||
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_physical_page_mappings;
|
||||
static std::array<void*, PowerPC::BAT_PAGE_COUNT> s_logical_page_mappings;
|
||||
|
||||
void Init()
|
||||
{
|
||||
const auto get_mem1_size = [] {
|
||||
|
@ -280,6 +285,8 @@ void Init()
|
|||
}
|
||||
g_arena.GrabSHMSegment(mem_size);
|
||||
|
||||
s_physical_page_mappings.fill(nullptr);
|
||||
|
||||
// Create an anonymous view of the physical memory
|
||||
for (const PhysicalMemoryRegion& region : s_physical_regions)
|
||||
{
|
||||
|
@ -295,8 +302,17 @@ void Init()
|
|||
region.physical_address, region.size);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE)
|
||||
{
|
||||
const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT;
|
||||
s_physical_page_mappings[index] = *region.out_pointer + i;
|
||||
}
|
||||
}
|
||||
|
||||
physical_page_mappings_base = reinterpret_cast<u8*>(s_physical_page_mappings.data());
|
||||
logical_page_mappings_base = reinterpret_cast<u8*>(s_logical_page_mappings.data());
|
||||
|
||||
InitMMIO(wii);
|
||||
|
||||
Clear();
|
||||
|
@ -347,14 +363,14 @@ bool InitFastmemArena()
|
|||
|
||||
void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
||||
{
|
||||
if (!is_fastmem_arena_initialized)
|
||||
return;
|
||||
|
||||
for (auto& entry : logical_mapped_entries)
|
||||
{
|
||||
g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
||||
}
|
||||
logical_mapped_entries.clear();
|
||||
|
||||
s_logical_page_mappings.fill(nullptr);
|
||||
|
||||
for (u32 i = 0; i < dbat_table.size(); ++i)
|
||||
{
|
||||
if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT)
|
||||
|
@ -375,19 +391,27 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table)
|
|||
if (intersection_start < intersection_end)
|
||||
{
|
||||
// Found an overlapping region; map it.
|
||||
u32 position = physical_region.shm_position + intersection_start - mapping_address;
|
||||
u8* base = logical_base + logical_address + intersection_start - translated_address;
|
||||
u32 mapped_size = intersection_end - intersection_start;
|
||||
|
||||
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
|
||||
if (!mapped_pointer)
|
||||
if (is_fastmem_arena_initialized)
|
||||
{
|
||||
PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
|
||||
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
|
||||
intersection_start, mapped_size, logical_address);
|
||||
exit(0);
|
||||
u32 position = physical_region.shm_position + intersection_start - mapping_address;
|
||||
u8* base = logical_base + logical_address + intersection_start - translated_address;
|
||||
u32 mapped_size = intersection_end - intersection_start;
|
||||
|
||||
void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base);
|
||||
if (!mapped_pointer)
|
||||
{
|
||||
PanicAlertFmt(
|
||||
"Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} "
|
||||
"(size 0x{:08X}) into logical fastmem region at 0x{:08X}.",
|
||||
intersection_start, mapped_size, logical_address);
|
||||
exit(0);
|
||||
}
|
||||
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
|
||||
}
|
||||
logical_mapped_entries.push_back({mapped_pointer, mapped_size});
|
||||
|
||||
s_logical_page_mappings[i] =
|
||||
*physical_region.out_pointer + intersection_start - mapping_address;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,11 @@ namespace Memory
|
|||
extern u8* physical_base;
|
||||
extern u8* logical_base;
|
||||
|
||||
// This page table is used for a "soft MMU" implementation when
|
||||
// setting up the full memory map in process memory isn't possible.
|
||||
extern u8* physical_page_mappings_base;
|
||||
extern u8* logical_page_mappings_base;
|
||||
|
||||
// The actual memory used for backing the memory map.
|
||||
extern u8* m_pRAM;
|
||||
extern u8* m_pEXRAM;
|
||||
|
|
|
@ -216,9 +216,6 @@ protected:
|
|||
void DumpCode(const u8* start, const u8* end);
|
||||
|
||||
// This enum is used for selecting an implementation of EmitBackpatchRoutine.
|
||||
//
|
||||
// The below descriptions of each enum entry apply when jo.fastmem_arena is true.
|
||||
// If jo.fastmem_arena is false, the slow C++ code is always called instead.
|
||||
enum class MemAccessMode
|
||||
{
|
||||
// Always calls the slow C++ code. For performance reasons, should generally only be used if
|
||||
|
@ -227,8 +224,10 @@ protected:
|
|||
// Only emits fast access code. Must only be used if the guest address is known in advance
|
||||
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
|
||||
AlwaysUnsafe,
|
||||
// Best in most cases. Tries to run fast access code, and if that fails, uses backpatching to
|
||||
// replace the code with a call to the slow C++ code.
|
||||
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
|
||||
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
|
||||
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
|
||||
// then branches to either the fast access code or the slow C++ code.
|
||||
Auto,
|
||||
};
|
||||
|
||||
|
@ -249,11 +248,12 @@ protected:
|
|||
//
|
||||
// Additional scratch registers are used in the following situations:
|
||||
//
|
||||
// mode == Auto && emitting_routine: X2
|
||||
// mode == Auto && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0
|
||||
// mode == Auto && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2
|
||||
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
|
||||
//
|
||||
// mode != AlwaysUnsafe || !jo.fastmem_arena:
|
||||
// mode != AlwaysUnsafe:
|
||||
// X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push)
|
||||
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
||||
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
||||
|
|
|
@ -60,8 +60,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
{
|
||||
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
|
||||
const bool emit_fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe;
|
||||
const bool emit_slowmem = !jo.fastmem_arena || mode != MemAccessMode::AlwaysUnsafe;
|
||||
const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe;
|
||||
const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe;
|
||||
|
||||
bool in_far_code = false;
|
||||
const u8* fastmem_start = GetCodePtr();
|
||||
|
@ -69,7 +69,29 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (emit_fastmem)
|
||||
{
|
||||
if (emit_slowmem && emitting_routine)
|
||||
ARM64Reg memory_base = MEM_REG;
|
||||
ARM64Reg memory_offset = addr;
|
||||
|
||||
if (!jo.fastmem_arena)
|
||||
{
|
||||
const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30;
|
||||
|
||||
memory_base = EncodeRegTo64(temp);
|
||||
memory_offset = ARM64Reg::W8; // TODO
|
||||
|
||||
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
||||
|
||||
if (emit_slowmem)
|
||||
{
|
||||
FixupBranch pass = CBNZ(memory_base);
|
||||
slowmem_fixup = B();
|
||||
SetJumpTarget(pass);
|
||||
}
|
||||
|
||||
AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64));
|
||||
}
|
||||
else if (emit_slowmem && emitting_routine)
|
||||
{
|
||||
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3;
|
||||
const ARM64Reg temp2 = ARM64Reg::W2;
|
||||
|
@ -82,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
ARM64Reg temp = ARM64Reg::D0;
|
||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
||||
|
||||
m_float_emit.STR(access_size, temp, MEM_REG, addr);
|
||||
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
|
||||
}
|
||||
else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||
{
|
||||
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr);
|
||||
m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset);
|
||||
|
||||
ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags,
|
||||
true, false);
|
||||
|
@ -97,28 +119,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
STR(temp, MEM_REG, addr);
|
||||
STR(temp, memory_base, memory_offset);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
STRH(temp, MEM_REG, addr);
|
||||
STRH(temp, memory_base, memory_offset);
|
||||
else
|
||||
STRB(temp, MEM_REG, addr);
|
||||
STRB(temp, memory_base, memory_offset);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
// This literally only stores 32bytes of zeros to the target address
|
||||
ARM64Reg temp = ARM64Reg::X30;
|
||||
ADD(temp, addr, MEM_REG);
|
||||
ADD(temp, memory_base, memory_offset);
|
||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
LDR(RS, MEM_REG, addr);
|
||||
LDR(RS, memory_base, memory_offset);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
LDRH(RS, MEM_REG, addr);
|
||||
LDRH(RS, memory_base, memory_offset);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||
LDRB(RS, MEM_REG, addr);
|
||||
LDRB(RS, memory_base, memory_offset);
|
||||
|
||||
ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false);
|
||||
}
|
||||
|
@ -134,7 +156,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
in_far_code = true;
|
||||
SwitchToFarCode();
|
||||
|
||||
if (!emitting_routine)
|
||||
if (jo.fastmem_arena && !emitting_routine)
|
||||
{
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
|
||||
fastmem_area->fastmem_code = fastmem_start;
|
||||
|
@ -264,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (in_far_code)
|
||||
{
|
||||
if (emitting_routine)
|
||||
if (slowmem_fixup)
|
||||
{
|
||||
FixupBranch done = B();
|
||||
SwitchToNearCode();
|
||||
|
|
|
@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder()
|
|||
ARM64Reg::W11,
|
||||
ARM64Reg::W10,
|
||||
ARM64Reg::W9,
|
||||
ARM64Reg::W8,
|
||||
// ARM64Reg::W8,
|
||||
ARM64Reg::W7,
|
||||
ARM64Reg::W6,
|
||||
ARM64Reg::W5,
|
||||
|
|
|
@ -93,10 +93,10 @@ void JitArm64::GenerateAsm()
|
|||
// set the mem_base based on MSR flags
|
||||
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||
MOVP2R(MEM_REG, Memory::physical_base);
|
||||
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
|
||||
FixupBranch membaseend = B();
|
||||
SetJumpTarget(physmem);
|
||||
MOVP2R(MEM_REG, Memory::logical_base);
|
||||
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
|
||||
SetJumpTarget(membaseend);
|
||||
|
||||
// iCache[(address >> 2) & iCache_Mask];
|
||||
|
@ -141,10 +141,10 @@ void JitArm64::GenerateAsm()
|
|||
// set the mem_base based on MSR flags and jump to next block.
|
||||
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||
MOVP2R(MEM_REG, Memory::physical_base);
|
||||
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base);
|
||||
BR(ARM64Reg::X0);
|
||||
SetJumpTarget(physmem);
|
||||
MOVP2R(MEM_REG, Memory::logical_base);
|
||||
MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base);
|
||||
BR(ARM64Reg::X0);
|
||||
|
||||
// Call JIT
|
||||
|
|
|
@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address);
|
|||
|
||||
constexpr int BAT_INDEX_SHIFT = 17;
|
||||
constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT;
|
||||
constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT);
|
||||
constexpr u32 BAT_MAPPED_BIT = 0x1;
|
||||
constexpr u32 BAT_PHYSICAL_BIT = 0x2;
|
||||
constexpr u32 BAT_WI_BIT = 0x4;
|
||||
constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7);
|
||||
using BatTable = std::array<u32, 1 << (32 - BAT_INDEX_SHIFT)>; // 128 KB
|
||||
using BatTable = std::array<u32, BAT_PAGE_COUNT>; // 128 KB
|
||||
extern BatTable ibat_table;
|
||||
extern BatTable dbat_table;
|
||||
inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)
|
||||
|
|
Loading…
Reference in New Issue