WIP fastmem
This commit is contained in:
parent
3dd717aca8
commit
5e45330703
|
@ -3,6 +3,7 @@
|
|||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
Log_SetChannel(Common::PageFaultHandler);
|
||||
|
||||
#if defined(WIN32)
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#pragma once
|
||||
#include "types.h"
|
||||
#include <functional>
|
||||
|
||||
namespace Common::PageFaultHandler {
|
||||
enum class HandlerResult
|
||||
|
@ -9,7 +8,7 @@ enum class HandlerResult
|
|||
ExecuteNextHandler,
|
||||
};
|
||||
|
||||
using Callback = std::function<HandlerResult(void* exception_pc, void* fault_address, bool is_write)>;
|
||||
using Callback = HandlerResult(*)(void* exception_pc, void* fault_address, bool is_write);
|
||||
using Handle = void*;
|
||||
|
||||
bool InstallHandler(void* owner, Callback callback);
|
||||
|
|
214
src/core/bus.cpp
214
src/core/bus.cpp
|
@ -10,6 +10,7 @@
|
|||
#include "cpu_disasm.h"
|
||||
#include "dma.h"
|
||||
#include "gpu.h"
|
||||
#include "host_interface.h"
|
||||
#include "interrupt_controller.h"
|
||||
#include "mdec.h"
|
||||
#include "pad.h"
|
||||
|
@ -22,11 +23,6 @@ Log_SetChannel(Bus);
|
|||
|
||||
namespace Bus {
|
||||
|
||||
enum : TickCount
|
||||
{
|
||||
RAM_READ_TICKS = 4
|
||||
};
|
||||
|
||||
union MEMDELAY
|
||||
{
|
||||
u32 bits;
|
||||
|
@ -74,8 +70,8 @@ union MEMCTRL
|
|||
};
|
||||
|
||||
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
|
||||
u8 g_ram[RAM_SIZE]{}; // 2MB RAM
|
||||
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
||||
u8* g_ram = nullptr; // 2MB RAM
|
||||
u8* g_bios = nullptr; // 512K BIOS ROM
|
||||
|
||||
static std::array<TickCount, 3> m_exp1_access_time = {};
|
||||
static std::array<TickCount, 3> m_exp2_access_time = {};
|
||||
|
@ -90,9 +86,15 @@ static u32 m_ram_size_reg = 0;
|
|||
|
||||
static std::string m_tty_line_buffer;
|
||||
|
||||
static Common::MemoryArena m_memory_arena;
|
||||
static u8* m_fastmem_base = nullptr;
|
||||
static std::vector<Common::MemoryArena::View> m_fastmem_ram_views;
|
||||
|
||||
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
|
||||
static void RecalculateMemoryTimings();
|
||||
|
||||
static void SetCodePageFastmemProtection(u32 page_index, bool writable);
|
||||
|
||||
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
|
||||
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u))
|
||||
#define FIXUP_HALFWORD_READ_OFFSET(offset) ((offset) & ~u32(1))
|
||||
|
@ -108,19 +110,32 @@ ALWAYS_INLINE static void FixupUnalignedWordAccessW32(u32& offset, u32& value)
|
|||
value <<= byte_offset * 8;
|
||||
}
|
||||
|
||||
void Initialize()
|
||||
bool Initialize()
|
||||
{
|
||||
if (!AllocateMemory())
|
||||
{
|
||||
g_host_interface->ReportError("Failed to allocate memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
//
|
||||
m_fastmem_ram_views.clear();
|
||||
if (g_ram)
|
||||
m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE);
|
||||
if (g_bios)
|
||||
m_memory_arena.ReleaseViewPtr(g_bios, BIOS_SIZE);
|
||||
|
||||
CPU::g_state.fastmem_base = nullptr;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
std::memset(g_ram, 0, sizeof(g_ram));
|
||||
std::memset(g_ram, 0, RAM_SIZE);
|
||||
m_MEMCTRL.exp1_base = 0x1F000000;
|
||||
m_MEMCTRL.exp2_base = 0x1F802000;
|
||||
m_MEMCTRL.exp1_delay_size.bits = 0x0013243F;
|
||||
|
@ -142,8 +157,8 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&m_bios_access_time);
|
||||
sw.Do(&m_cdrom_access_time);
|
||||
sw.Do(&m_spu_access_time);
|
||||
sw.DoBytes(g_ram, sizeof(g_ram));
|
||||
sw.DoBytes(g_bios, sizeof(g_bios));
|
||||
sw.DoBytes(g_ram, RAM_SIZE);
|
||||
sw.DoBytes(g_bios, BIOS_SIZE);
|
||||
sw.DoArray(m_MEMCTRL.regs, countof(m_MEMCTRL.regs));
|
||||
sw.Do(&m_ram_size_reg);
|
||||
sw.Do(&m_tty_line_buffer);
|
||||
|
@ -222,6 +237,181 @@ void RecalculateMemoryTimings()
|
|||
m_spu_access_time[2] + 1);
|
||||
}
|
||||
|
||||
bool AllocateMemory()
|
||||
{
|
||||
if (!m_memory_arena.Create(MEMORY_ARENA_SIZE, true, false))
|
||||
{
|
||||
Log_ErrorPrint("Failed to create memory arena");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the base views.
|
||||
g_ram = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false));
|
||||
g_bios = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, true, false));
|
||||
if (!g_ram || !g_bios)
|
||||
{
|
||||
Log_ErrorPrint("Failed to create base views of memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void UpdateFastmemViews(bool enabled, bool isolate_cache)
|
||||
{
|
||||
m_fastmem_ram_views.clear();
|
||||
if (!enabled)
|
||||
{
|
||||
m_fastmem_base = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false");
|
||||
if (!m_fastmem_base)
|
||||
{
|
||||
m_fastmem_base = static_cast<u8*>(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE));
|
||||
if (!m_fastmem_base)
|
||||
{
|
||||
Log_ErrorPrint("Failed to find base address for fastmem");
|
||||
return;
|
||||
}
|
||||
|
||||
Log_InfoPrintf("Fastmem base: %p", m_fastmem_base);
|
||||
CPU::g_state.fastmem_base = m_fastmem_base;
|
||||
}
|
||||
|
||||
auto MapRAM = [](u32 base_address) {
|
||||
u8* map_address = m_fastmem_base + base_address;
|
||||
auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false, map_address);
|
||||
if (!view)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// mark all pages with code as non-writable
|
||||
for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++)
|
||||
{
|
||||
if (m_ram_code_bits[i])
|
||||
{
|
||||
u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE);
|
||||
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to write-protect code page at %p");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_fastmem_ram_views.push_back(std::move(view.value()));
|
||||
};
|
||||
auto MapBIOS = [](u32 base_address) {
|
||||
u8* map_address = m_fastmem_base + base_address;
|
||||
auto view = m_memory_arena.CreateView(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, false, false, map_address);
|
||||
if (!view)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map BIOS at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
m_fastmem_ram_views.push_back(std::move(view.value()));
|
||||
};
|
||||
|
||||
if (!isolate_cache)
|
||||
{
|
||||
// KUSEG - cached
|
||||
MapRAM(0x00000000);
|
||||
// MapBIOS(0x1FC00000);
|
||||
|
||||
// KSEG0 - cached
|
||||
MapRAM(0x80000000);
|
||||
// MapBIOS(0x9FC00000);
|
||||
}
|
||||
|
||||
// KSEG1 - uncached
|
||||
MapRAM(0xA0000000);
|
||||
// MapBIOS(0xBFC00000);
|
||||
}
|
||||
|
||||
bool IsRAMCodePage(u32 index)
|
||||
{
|
||||
return m_ram_code_bits[index];
|
||||
}
|
||||
|
||||
void SetRAMCodePage(u32 index)
|
||||
{
|
||||
if (m_ram_code_bits[index])
|
||||
return;
|
||||
|
||||
// protect fastmem pages
|
||||
m_ram_code_bits[index] = true;
|
||||
SetCodePageFastmemProtection(index, false);
|
||||
}
|
||||
|
||||
void ClearRAMCodePage(u32 index)
|
||||
{
|
||||
if (!m_ram_code_bits[index])
|
||||
return;
|
||||
|
||||
// unprotect fastmem pages
|
||||
m_ram_code_bits[index] = false;
|
||||
SetCodePageFastmemProtection(index, true);
|
||||
}
|
||||
|
||||
void SetCodePageFastmemProtection(u32 page_index, bool writable)
|
||||
{
|
||||
// unprotect fastmem pages
|
||||
for (const auto& view : m_fastmem_ram_views)
|
||||
{
|
||||
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE);
|
||||
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index,
|
||||
page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClearRAMCodePageFlags()
|
||||
{
|
||||
m_ram_code_bits.reset();
|
||||
|
||||
// unprotect fastmem pages
|
||||
for (const auto& view : m_fastmem_ram_views)
|
||||
{
|
||||
if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IsCodePageAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false;
|
||||
}
|
||||
|
||||
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
|
||||
{
|
||||
if (!IsRAMAddress(start_address))
|
||||
return false;
|
||||
|
||||
start_address = (start_address & RAM_MASK);
|
||||
|
||||
const u32 end_address = start_address + size;
|
||||
while (start_address < end_address)
|
||||
{
|
||||
const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
if (m_ram_code_bits[code_page_index])
|
||||
return true;
|
||||
|
||||
start_address += CPU_CODE_CACHE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static TickCount DoInvalidAccess(MemoryAccessType type, MemoryAccessSize size, PhysicalMemoryAddress address,
|
||||
u32& value)
|
||||
{
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include "common/bitfield.h"
|
||||
#include "common/memory_arena.h"
|
||||
#include "types.h"
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
|
@ -65,26 +66,69 @@ enum : u32
|
|||
MEMCTRL_REG_COUNT = 9
|
||||
};
|
||||
|
||||
void Initialize();
|
||||
enum : TickCount
|
||||
{
|
||||
RAM_READ_TICKS = 4
|
||||
};
|
||||
|
||||
enum : size_t
|
||||
{
|
||||
// Our memory arena contains storage for RAM and BIOS.
|
||||
MEMORY_ARENA_SIZE = RAM_SIZE + BIOS_SIZE,
|
||||
|
||||
// Offsets within the memory arena.
|
||||
MEMORY_ARENA_RAM_OFFSET = 0,
|
||||
MEMORY_ARENA_BIOS_OFFSET = MEMORY_ARENA_RAM_OFFSET + RAM_SIZE,
|
||||
|
||||
// Fastmem region size is 4GB to cover the entire 32-bit address space.
|
||||
FASTMEM_REGION_SIZE = UINT64_C(0x100000000)
|
||||
};
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void Reset();
|
||||
bool DoState(StateWrapper& sw);
|
||||
|
||||
u8* GetFastmemBase();
|
||||
bool AllocateMemory();
|
||||
void UpdateFastmemViews(bool enabled, bool isolate_cache);
|
||||
|
||||
void SetExpansionROM(std::vector<u8> data);
|
||||
void SetBIOS(const std::vector<u8>& image);
|
||||
|
||||
extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
|
||||
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
|
||||
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
|
||||
extern u8* g_ram; // 2MB RAM
|
||||
extern u8* g_bios; // 512K BIOS ROM
|
||||
|
||||
/// Returns true if the address specified is writable (RAM).
|
||||
ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
return address < RAM_MIRROR_END;
|
||||
}
|
||||
|
||||
/// Returns the code page index for a RAM address.
|
||||
ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address)
|
||||
{
|
||||
return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
/// Returns true if the specified page contains code.
|
||||
bool IsRAMCodePage(u32 index);
|
||||
|
||||
/// Flags a RAM region as code, so we know when to invalidate blocks.
|
||||
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
|
||||
void SetRAMCodePage(u32 index);
|
||||
|
||||
/// Unflags a RAM region as code, the code cache will no longer be notified when writes occur.
|
||||
ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; }
|
||||
void ClearRAMCodePage(u32 index);
|
||||
|
||||
/// Clears all code bits for RAM regions.
|
||||
ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); }
|
||||
void ClearRAMCodePageFlags();
|
||||
|
||||
/// Returns true if the specified address is in a code page.
|
||||
bool IsCodePageAddress(PhysicalMemoryAddress address);
|
||||
|
||||
/// Returns true if the range specified overlaps with a code page.
|
||||
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size);
|
||||
|
||||
/// Returns the number of cycles stolen by DMA RAM access.
|
||||
ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
|
||||
|
@ -96,4 +140,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
|
|||
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
|
||||
}
|
||||
|
||||
} // namespace Bus
|
||||
} // namespace Bus
|
|
@ -5,6 +5,7 @@
|
|||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_disasm.h"
|
||||
#include "settings.h"
|
||||
#include "system.h"
|
||||
#include "timing_event.h"
|
||||
Log_SetChannel(CPU::CodeCache);
|
||||
|
@ -61,6 +62,7 @@ static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
|
|||
#endif
|
||||
|
||||
using BlockMap = std::unordered_map<u32, CodeBlock*>;
|
||||
using HostCodeMap = std::map<CodeBlock::HostCodePointer, CodeBlock*>;
|
||||
|
||||
void LogCurrentState();
|
||||
|
||||
|
@ -85,36 +87,49 @@ static void LinkBlock(CodeBlock* from, CodeBlock* to);
|
|||
/// Unlink all blocks which point to this block, and any that this block links to.
|
||||
static void UnlinkBlock(CodeBlock* block);
|
||||
|
||||
static bool s_use_recompiler = false;
|
||||
static BlockMap s_blocks;
|
||||
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
|
||||
|
||||
void Initialize(bool use_recompiler)
|
||||
#ifdef WITH_RECOMPILER
|
||||
static HostCodeMap s_host_code_map;
|
||||
|
||||
static void AddBlockToHostCodeMap(CodeBlock* block);
|
||||
static void RemoveBlockFromHostCodeMap(CodeBlock* block);
|
||||
static bool InitializeFastmem();
|
||||
static void ShutdownFastmem();
|
||||
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
|
||||
#endif
|
||||
|
||||
void Initialize()
|
||||
{
|
||||
Assert(s_blocks.empty());
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_use_recompiler = use_recompiler;
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
RECOMPILER_GUARD_SIZE))
|
||||
#else
|
||||
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
#endif
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
RECOMPILER_GUARD_SIZE))
|
||||
#else
|
||||
s_use_recompiler = false;
|
||||
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
#endif
|
||||
{
|
||||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
Panic("Failed to initialize fastmem");
|
||||
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
Flush();
|
||||
ShutdownFastmem();
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_code_buffer.Destroy();
|
||||
#endif
|
||||
|
@ -279,14 +294,33 @@ void ExecuteRecompiler()
|
|||
|
||||
#endif
|
||||
|
||||
void SetUseRecompiler(bool enable)
|
||||
void Reinitialize()
|
||||
{
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (s_use_recompiler == enable)
|
||||
return;
|
||||
|
||||
s_use_recompiler = enable;
|
||||
Flush();
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
ShutdownFastmem();
|
||||
s_code_buffer.Destroy();
|
||||
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
RECOMPILER_GUARD_SIZE))
|
||||
#else
|
||||
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
#endif
|
||||
{
|
||||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
Panic("Failed to initialize fastmem");
|
||||
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -298,8 +332,10 @@ void Flush()
|
|||
|
||||
for (const auto& it : s_blocks)
|
||||
delete it.second;
|
||||
|
||||
s_blocks.clear();
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_host_code_map.clear();
|
||||
s_code_buffer.Reset();
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
|
@ -358,6 +394,8 @@ CodeBlock* LookupBlock(CodeBlockKey key)
|
|||
}
|
||||
|
||||
iter = s_blocks.emplace(key.bits, block).first;
|
||||
AddBlockToHostCodeMap(block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
|
@ -384,6 +422,8 @@ bool RevalidateBlock(CodeBlock* block)
|
|||
return true;
|
||||
|
||||
recompile:
|
||||
RemoveBlockFromHostCodeMap(block);
|
||||
|
||||
block->instructions.clear();
|
||||
if (!CompileBlock(block))
|
||||
{
|
||||
|
@ -393,6 +433,7 @@ recompile:
|
|||
}
|
||||
|
||||
// re-add to page map again
|
||||
AddBlockToHostCodeMap(block);
|
||||
if (block->IsInRAM())
|
||||
AddBlockToPageMap(block);
|
||||
|
||||
|
@ -439,6 +480,9 @@ bool CompileBlock(CodeBlock* block)
|
|||
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
|
||||
}
|
||||
|
||||
block->contains_loadstore_instructions |= cbi.is_load_instruction;
|
||||
block->contains_loadstore_instructions |= cbi.is_store_instruction;
|
||||
|
||||
// instruction is decoded now
|
||||
block->instructions.push_back(cbi);
|
||||
pc += sizeof(cbi.instruction.bits);
|
||||
|
@ -481,7 +525,7 @@ bool CompileBlock(CodeBlock* block)
|
|||
}
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (s_use_recompiler)
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
// Ensure we're not going to run out of space while compiling this block.
|
||||
if (s_code_buffer.GetFreeCodeSpace() <
|
||||
|
@ -552,6 +596,9 @@ void FlushBlock(CodeBlock* block)
|
|||
RemoveBlockFromPageMap(block);
|
||||
|
||||
UnlinkBlock(block);
|
||||
#ifdef WITH_RECOMPILER
|
||||
RemoveBlockFromHostCodeMap(block);
|
||||
#endif
|
||||
|
||||
s_blocks.erase(iter);
|
||||
delete block;
|
||||
|
@ -613,4 +660,107 @@ void UnlinkBlock(CodeBlock* block)
|
|||
block->link_successors.clear();
|
||||
}
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
void AddBlockToHostCodeMap(CodeBlock* block)
|
||||
{
|
||||
if (!g_settings.IsUsingRecompiler())
|
||||
return;
|
||||
|
||||
auto ir = s_host_code_map.emplace(block->host_code, block);
|
||||
Assert(ir.second);
|
||||
}
|
||||
|
||||
void RemoveBlockFromHostCodeMap(CodeBlock* block)
|
||||
{
|
||||
if (!g_settings.IsUsingRecompiler())
|
||||
return;
|
||||
|
||||
HostCodeMap::iterator hc_iter = s_host_code_map.find(block->host_code);
|
||||
Assert(hc_iter != s_host_code_map.end());
|
||||
s_host_code_map.erase(hc_iter);
|
||||
}
|
||||
|
||||
bool InitializeFastmem()
|
||||
{
|
||||
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to install page fault handler");
|
||||
return false;
|
||||
}
|
||||
|
||||
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShutdownFastmem()
|
||||
{
|
||||
Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
|
||||
Bus::UpdateFastmemViews(false, false);
|
||||
}
|
||||
|
||||
Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
|
||||
{
|
||||
if (static_cast<u8*>(fault_address) < g_state.fastmem_base ||
|
||||
(static_cast<u8*>(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE)
|
||||
{
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
const PhysicalMemoryAddress fastmem_address =
|
||||
static_cast<PhysicalMemoryAddress>(static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - g_state.fastmem_base));
|
||||
|
||||
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
|
||||
is_write ? "(write)" : "(read)", fastmem_address);
|
||||
|
||||
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(fastmem_address))
|
||||
{
|
||||
// this is probably a code page, since we aren't going to fault due to requiring fastmem on RAM.
|
||||
const u32 code_page_index = Bus::GetRAMCodePageIndex(fastmem_address);
|
||||
if (Bus::IsRAMCodePage(code_page_index))
|
||||
{
|
||||
InvalidateBlocksWithPageIndex(code_page_index);
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
}
|
||||
|
||||
// use upper_bound to find the next block after the pc
|
||||
HostCodeMap::iterator upper_iter =
|
||||
s_host_code_map.upper_bound(reinterpret_cast<CodeBlock::HostCodePointer>(exception_pc));
|
||||
if (upper_iter == s_host_code_map.begin())
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
|
||||
// then decrement it by one to (hopefully) get the block we want
|
||||
upper_iter--;
|
||||
|
||||
// find the loadstore info in the code block
|
||||
CodeBlock* block = upper_iter->second;
|
||||
for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
|
||||
++bpi_iter)
|
||||
{
|
||||
const Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
|
||||
if (lbi.host_pc == exception_pc)
|
||||
{
|
||||
// found it, do fixup
|
||||
if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi))
|
||||
{
|
||||
// remove the backpatch entry since we won't be coming back to this one
|
||||
block->loadstore_backpatch_info.erase(bpi_iter);
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
else
|
||||
{
|
||||
Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we didn't find the pc in our list..
|
||||
Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace CPU::CodeCache
|
||||
|
|
|
@ -2,12 +2,18 @@
|
|||
#include "bus.h"
|
||||
#include "common/bitfield.h"
|
||||
#include "common/jit_code_buffer.h"
|
||||
#include "common/page_fault_handler.h"
|
||||
#include "cpu_types.h"
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
#include "cpu_recompiler_types.h"
|
||||
#endif
|
||||
|
||||
namespace CPU {
|
||||
|
||||
enum : u32
|
||||
|
@ -71,6 +77,12 @@ struct CodeBlock
|
|||
|
||||
TickCount uncached_fetch_ticks = 0;
|
||||
u32 icache_line_count = 0;
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
std::vector<Recompiler::LoadStoreBackpatchInfo> loadstore_backpatch_info;
|
||||
#endif
|
||||
|
||||
bool contains_loadstore_instructions = false;
|
||||
bool invalidated = false;
|
||||
|
||||
const u32 GetPC() const { return key.GetPC(); }
|
||||
|
@ -89,7 +101,7 @@ struct CodeBlock
|
|||
|
||||
namespace CodeCache {
|
||||
|
||||
void Initialize(bool use_recompiler);
|
||||
void Initialize();
|
||||
void Shutdown();
|
||||
void Execute();
|
||||
|
||||
|
@ -102,7 +114,7 @@ void ExecuteRecompiler();
|
|||
void Flush();
|
||||
|
||||
/// Changes whether the recompiler is enabled.
|
||||
void SetUseRecompiler(bool enable);
|
||||
void Reinitialize();
|
||||
|
||||
/// Invalidates all blocks which are in the range of the specified code page.
|
||||
void InvalidateBlocksWithPageIndex(u32 page_index);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "cpu_core.h"
|
||||
#include "bus.h"
|
||||
#include "common/align.h"
|
||||
#include "common/file_system.h"
|
||||
#include "common/log.h"
|
||||
|
@ -1563,6 +1564,11 @@ bool InterpretInstructionPGXP()
|
|||
return g_state.exception_raised;
|
||||
}
|
||||
|
||||
void UpdateFastmemMapping()
|
||||
{
|
||||
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
|
||||
}
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
} // namespace CPU
|
|
@ -79,6 +79,8 @@ struct State
|
|||
// GTE registers are stored here so we can access them on ARM with a single instruction
|
||||
GTE::Regs gte_regs = {};
|
||||
|
||||
u8* fastmem_base = nullptr;
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
||||
std::array<u32, ICACHE_LINES> icache_tags = {};
|
||||
|
|
|
@ -19,8 +19,7 @@ u32 CodeGenerator::CalculateRegisterOffset(Reg reg)
|
|||
return u32(offsetof(State, regs.r[0]) + (static_cast<u32>(reg) * sizeof(u32)));
|
||||
}
|
||||
|
||||
bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code,
|
||||
u32* out_host_code_size)
|
||||
bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
|
||||
{
|
||||
// TODO: Align code buffer.
|
||||
|
||||
|
@ -40,8 +39,10 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
|||
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
|
||||
#endif
|
||||
|
||||
m_current_instruction = cbi;
|
||||
if (!CompileInstruction(*cbi))
|
||||
{
|
||||
m_current_instruction = nullptr;
|
||||
m_block_end = nullptr;
|
||||
m_block_start = nullptr;
|
||||
m_block = nullptr;
|
||||
|
@ -60,6 +61,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
|||
|
||||
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
|
||||
|
||||
m_current_instruction = nullptr;
|
||||
m_block_end = nullptr;
|
||||
m_block_start = nullptr;
|
||||
m_block = nullptr;
|
||||
|
@ -1895,7 +1897,22 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
|
|||
value = AndValues(value, Value::FromConstantU32(write_mask));
|
||||
}
|
||||
|
||||
EmitStoreCPUStructField(offset, value);
|
||||
// changing SR[Isc] needs to update fastmem views
|
||||
if (reg == Cop0Reg::SR && g_settings.cpu_fastmem)
|
||||
{
|
||||
LabelType skip_fastmem_update;
|
||||
Value old_value = m_register_cache.AllocateScratch(RegSize_32);
|
||||
EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset);
|
||||
EmitStoreCPUStructField(offset, value);
|
||||
EmitXor(old_value.host_reg, old_value.host_reg, value);
|
||||
EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_fastmem_update);
|
||||
EmitFunctionCall(nullptr, &Thunks::UpdateFastmemMapping, m_register_cache.GetCPUPtr());
|
||||
EmitBindLabel(&skip_fastmem_update);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitStoreCPUStructField(offset, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,9 @@ public:
|
|||
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
|
||||
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
|
||||
|
||||
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
|
||||
|
||||
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
|
||||
CodeBlock::HostCodePointer CompileDispatcher();
|
||||
|
||||
|
@ -73,7 +75,11 @@ public:
|
|||
|
||||
// Automatically generates an exception handler.
|
||||
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
|
||||
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
|
||||
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code);
|
||||
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
|
||||
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
|
||||
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code);
|
||||
|
||||
// Unconditional branch to pointer. May allocate a scratch register.
|
||||
void EmitBranch(const void* address, bool allow_scratch = true);
|
||||
|
@ -204,9 +210,10 @@ private:
|
|||
bool Compile_cop2(const CodeBlockInstruction& cbi);
|
||||
|
||||
JitCodeBuffer* m_code_buffer;
|
||||
const CodeBlock* m_block = nullptr;
|
||||
CodeBlock* m_block = nullptr;
|
||||
const CodeBlockInstruction* m_block_start = nullptr;
|
||||
const CodeBlockInstruction* m_block_end = nullptr;
|
||||
const CodeBlockInstruction* m_current_instruction = nullptr;
|
||||
RegisterCache m_register_cache;
|
||||
CodeEmitter m_near_emitter;
|
||||
CodeEmitter m_far_emitter;
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace a64 = vixl::aarch64;
|
|||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 19;
|
||||
constexpr HostReg RMEMBASEPTR = 20;
|
||||
constexpr HostReg RRETURN = 0;
|
||||
constexpr HostReg RARG1 = 0;
|
||||
constexpr HostReg RARG2 = 1;
|
||||
|
@ -86,6 +87,11 @@ static const a64::XRegister GetCPUPtrReg()
|
|||
return GetHostReg64(RCPUPTR);
|
||||
}
|
||||
|
||||
static const a64::XRegister GetFastmemBasePtrReg()
|
||||
{
|
||||
return GetHostReg64(RMEMBASEPTR);
|
||||
}
|
||||
|
||||
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
|
||||
: m_code_buffer(code_buffer), m_register_cache(*this),
|
||||
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
|
||||
|
@ -188,10 +194,21 @@ void CodeGenerator::EmitBeginBlock()
|
|||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
{
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
|
||||
|
@ -1308,12 +1325,105 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
|
||||
AddPendingCycles(true);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
a64::MemOperand actual_address;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
|
||||
}
|
||||
|
||||
// TODO: movsx/zx inline here
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
// We need to use the full 64 bits here since we test the sign bit result.
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
// NOTE: This can leave junk in the upper bits
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1342,7 +1452,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->Bind(&load_okay);
|
||||
|
||||
// load exception path
|
||||
SwitchToFarCode();
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (-result << 2) | BD | cop_n
|
||||
m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg));
|
||||
|
@ -1353,37 +1464,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
SwitchToNearCode();
|
||||
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1402,27 +1490,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1443,11 +1510,87 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
// we need the value in a host register to store it
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
EmitStoreGuestMemoryFastmem(cbi, address, value_in_hr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
a64::MemOperand actual_address;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH));
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
|
||||
}
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->Strb(GetHostReg8(value), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->Strh(GetHostReg16(value), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Str(GetHostReg32(value), actual_address);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value, bool in_far_code)
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
Assert(!in_far_code);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1475,7 +1618,8 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->Bind(&store_okay);
|
||||
|
||||
// store exception path
|
||||
SwitchToFarCode();
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (result << 2) | BD | cop_n
|
||||
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
|
||||
|
@ -1484,15 +1628,14 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
if (!in_far_code)
|
||||
EmitExceptionExit();
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1514,6 +1657,30 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
|
||||
|
||||
// check jump distance
|
||||
const s64 jump_distance =
|
||||
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.host_slowmem_pc) - reinterpret_cast<intptr_t>(lbi.host_pc));
|
||||
Assert(Common::IsAligned(jump_distance, 4));
|
||||
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size,
|
||||
a64::PositionDependentCode);
|
||||
emit.b(jump_distance >> 2);
|
||||
|
||||
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
EmitLoadGlobalAddress(RSCRATCH, ptr);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
|
@ -12,6 +13,7 @@ namespace CPU::Recompiler {
|
|||
|
||||
#if defined(ABI_WIN64)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RCX;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RDX;
|
||||
|
@ -21,6 +23,7 @@ constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
|
|||
constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16;
|
||||
#elif defined(ABI_SYSV)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RDI;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RSI;
|
||||
|
@ -79,6 +82,11 @@ static const Xbyak::Reg64 GetCPUPtrReg()
|
|||
return GetHostReg64(RCPUPTR);
|
||||
}
|
||||
|
||||
static const Xbyak::Reg64 GetFastmemBasePtrReg()
|
||||
{
|
||||
return GetHostReg64(RMEMBASEPTR);
|
||||
}
|
||||
|
||||
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
|
||||
: m_code_buffer(code_buffer), m_register_cache(*this),
|
||||
m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
|
||||
|
@ -140,7 +148,6 @@ void CodeGenerator::InitHostRegs()
|
|||
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI,
|
||||
Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12,
|
||||
Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15});
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
#elif defined(ABI_SYSV)
|
||||
m_register_cache.SetHostRegAllocationOrder(
|
||||
{Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
|
||||
|
@ -154,8 +161,9 @@ void CodeGenerator::InitHostRegs()
|
|||
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP,
|
||||
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14,
|
||||
Xbyak::Operand::R15});
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
#endif
|
||||
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
}
|
||||
|
||||
void CodeGenerator::SwitchToFarCode()
|
||||
|
@ -196,11 +204,22 @@ void CodeGenerator::EmitBeginBlock()
|
|||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
|
||||
m_emit->ret();
|
||||
|
@ -1762,12 +1781,139 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
|
||||
AddPendingCycles(true);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
// can't store displacements > 0x80000000 in-line
|
||||
const Value* actual_address = &address;
|
||||
if (address.IsConstant() && address.constant_value >= 0x80000000)
|
||||
{
|
||||
actual_address = &result;
|
||||
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
// TODO: movsx/zx inline here
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg8(result.host_reg),
|
||||
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg8(result.host_reg),
|
||||
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg16(result.host_reg),
|
||||
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg16(result.host_reg),
|
||||
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg32(result.host_reg),
|
||||
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg32(result.host_reg),
|
||||
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: BIOS reads...
|
||||
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
// We need to use the full 64 bits here since we test the sign bit result.
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
// NOTE: This can leave junk in the upper bits
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1794,7 +1940,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_register_cache.PushState();
|
||||
|
||||
// load exception path
|
||||
SwitchToFarCode();
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (-result << 2) | BD | cop_n
|
||||
m_emit->neg(GetHostReg32(result.host_reg));
|
||||
|
@ -1805,37 +1952,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
SwitchToNearCode();
|
||||
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1854,27 +1978,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1895,11 +1998,163 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitStoreGuestMemoryFastmem(cbi, address, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
// can't store displacements > 0x80000000 in-line
|
||||
const Value* actual_address = &address;
|
||||
Value temp_address;
|
||||
if (address.IsConstant() && address.constant_value >= 0x80000000)
|
||||
{
|
||||
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
|
||||
actual_address = &temp_address;
|
||||
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg8(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg8(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg16(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg16(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg32(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg32(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
Assert(!in_far_code);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1925,24 +2180,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->jnz(GetCurrentFarCodePointer());
|
||||
|
||||
// store exception path
|
||||
SwitchToFarCode();
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (result << 2) | BD | cop_n
|
||||
m_emit->shl(GetHostReg32(result.host_reg), 2);
|
||||
m_emit->or_(GetHostReg32(result.host_reg),
|
||||
m_emit->shl(GetHostReg32(result), 2);
|
||||
m_emit->or_(GetHostReg32(result),
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
|
||||
cbi.instruction.cop.cop_n));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
SwitchToNearCode();
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1964,6 +2219,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
|
||||
cg.jmp(lbi.host_slowmem_pc);
|
||||
|
||||
const s32 nops = static_cast<s32>(lbi.host_code_size) -
|
||||
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(lbi.host_pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
const s64 displacement =
|
||||
|
|
|
@ -32,6 +32,7 @@ void UncheckedWriteMemoryByte(u32 address, u8 value);
|
|||
void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
|
||||
void UncheckedWriteMemoryWord(u32 address, u32 value);
|
||||
|
||||
void UpdateFastmemMapping();
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
|
|
|
@ -127,6 +127,16 @@ constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = false;
|
|||
|
||||
#endif
|
||||
|
||||
struct LoadStoreBackpatchInfo
|
||||
{
|
||||
void* host_pc; // pointer to instruction which will fault
|
||||
void* host_slowmem_pc; // pointer to slowmem callback code
|
||||
u32 host_code_size; // size of the fastmem load as well as the add for cycles
|
||||
HostReg address_host_reg; // register containing the guest address to load/store
|
||||
HostReg value_host_reg; // register containing the source/destination
|
||||
PhysicalMemoryAddress guest_pc;
|
||||
};
|
||||
|
||||
} // namespace Recompiler
|
||||
|
||||
} // namespace CPU
|
||||
|
|
|
@ -366,6 +366,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
|
|||
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
si.SetBoolValue("CPU", "ICache", false);
|
||||
si.SetBoolValue("CPU", "Fastmem", true);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
|
||||
si.SetIntValue("GPU", "ResolutionScale", 1);
|
||||
|
@ -512,12 +513,13 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
|||
if (g_settings.emulation_speed != old_settings.emulation_speed)
|
||||
System::UpdateThrottlePeriod();
|
||||
|
||||
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode)
|
||||
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode ||
|
||||
g_settings.cpu_fastmem != old_settings.cpu_fastmem)
|
||||
{
|
||||
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
|
||||
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
|
||||
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
CPU::CodeCache::Flush();
|
||||
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode%s.",
|
||||
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode),
|
||||
g_settings.cpu_fastmem ? " (fastmem)" : "");
|
||||
CPU::CodeCache::Reinitialize();
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ void Settings::Load(SettingsInterface& si)
|
|||
.value_or(DEFAULT_CPU_EXECUTION_MODE);
|
||||
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
|
||||
cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true);
|
||||
|
||||
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
|
||||
.value_or(DEFAULT_GPU_RENDERER);
|
||||
|
@ -217,6 +218,7 @@ void Settings::Save(SettingsInterface& si) const
|
|||
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
|
||||
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
|
||||
si.SetBoolValue("CPU", "Fastmem", cpu_fastmem);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
|
||||
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());
|
||||
|
|
|
@ -72,6 +72,7 @@ struct Settings
|
|||
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
|
||||
bool cpu_recompiler_memory_exceptions = false;
|
||||
bool cpu_recompiler_icache = false;
|
||||
bool cpu_fastmem = true;
|
||||
|
||||
float emulation_speed = 1.0f;
|
||||
bool speed_limiter_enabled = true;
|
||||
|
@ -172,6 +173,11 @@ struct Settings
|
|||
return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool IsUsingFastmem() const
|
||||
{
|
||||
return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions);
|
||||
}
|
||||
|
||||
bool HasAnyPerGameMemoryCards() const;
|
||||
|
||||
enum : u32
|
||||
|
|
|
@ -708,14 +708,16 @@ bool Initialize(bool force_software_renderer)
|
|||
TimingEvents::Initialize();
|
||||
|
||||
CPU::Initialize();
|
||||
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
Bus::Initialize();
|
||||
|
||||
if (!Bus::Initialize())
|
||||
return false;
|
||||
|
||||
CPU::CodeCache::Initialize();
|
||||
|
||||
if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer))
|
||||
return false;
|
||||
|
||||
g_dma.Initialize();
|
||||
|
||||
g_interrupt_controller.Initialize();
|
||||
|
||||
g_cdrom.Initialize();
|
||||
|
|
|
@ -129,6 +129,6 @@ enum : u32
|
|||
|
||||
enum : u32
|
||||
{
|
||||
CPU_CODE_CACHE_PAGE_SIZE = 1024,
|
||||
CPU_CODE_CACHE_PAGE_SIZE = 4096,
|
||||
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue