WIP fastmem

This commit is contained in:
Connor McLaughlin 2020-09-06 21:07:09 +10:00
parent 3dd717aca8
commit 5e45330703
19 changed files with 1070 additions and 179 deletions

View File

@ -3,6 +3,7 @@
#include <algorithm>
#include <cstring>
#include <mutex>
#include <vector>
Log_SetChannel(Common::PageFaultHandler);
#if defined(WIN32)

View File

@ -1,6 +1,5 @@
#pragma once
#include "types.h"
#include <functional>
namespace Common::PageFaultHandler {
enum class HandlerResult
@ -9,7 +8,7 @@ enum class HandlerResult
ExecuteNextHandler,
};
using Callback = std::function<HandlerResult(void* exception_pc, void* fault_address, bool is_write)>;
using Callback = HandlerResult(*)(void* exception_pc, void* fault_address, bool is_write);
using Handle = void*;
bool InstallHandler(void* owner, Callback callback);

View File

@ -10,6 +10,7 @@
#include "cpu_disasm.h"
#include "dma.h"
#include "gpu.h"
#include "host_interface.h"
#include "interrupt_controller.h"
#include "mdec.h"
#include "pad.h"
@ -22,11 +23,6 @@ Log_SetChannel(Bus);
namespace Bus {
enum : TickCount
{
RAM_READ_TICKS = 4
};
union MEMDELAY
{
u32 bits;
@ -74,8 +70,8 @@ union MEMCTRL
};
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
u8 g_ram[RAM_SIZE]{}; // 2MB RAM
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
u8* g_ram = nullptr; // 2MB RAM
u8* g_bios = nullptr; // 512K BIOS ROM
static std::array<TickCount, 3> m_exp1_access_time = {};
static std::array<TickCount, 3> m_exp2_access_time = {};
@ -90,9 +86,15 @@ static u32 m_ram_size_reg = 0;
static std::string m_tty_line_buffer;
static Common::MemoryArena m_memory_arena;
static u8* m_fastmem_base = nullptr;
static std::vector<Common::MemoryArena::View> m_fastmem_ram_views;
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
static void RecalculateMemoryTimings();
static void SetCodePageFastmemProtection(u32 page_index, bool writable);
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u))
#define FIXUP_HALFWORD_READ_OFFSET(offset) ((offset) & ~u32(1))
@ -108,19 +110,32 @@ ALWAYS_INLINE static void FixupUnalignedWordAccessW32(u32& offset, u32& value)
value <<= byte_offset * 8;
}
void Initialize()
bool Initialize()
{
if (!AllocateMemory())
{
g_host_interface->ReportError("Failed to allocate memory");
return false;
}
Reset();
return true;
}
void Shutdown()
{
//
m_fastmem_ram_views.clear();
if (g_ram)
m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE);
if (g_bios)
m_memory_arena.ReleaseViewPtr(g_bios, BIOS_SIZE);
CPU::g_state.fastmem_base = nullptr;
}
void Reset()
{
std::memset(g_ram, 0, sizeof(g_ram));
std::memset(g_ram, 0, RAM_SIZE);
m_MEMCTRL.exp1_base = 0x1F000000;
m_MEMCTRL.exp2_base = 0x1F802000;
m_MEMCTRL.exp1_delay_size.bits = 0x0013243F;
@ -142,8 +157,8 @@ bool DoState(StateWrapper& sw)
sw.Do(&m_bios_access_time);
sw.Do(&m_cdrom_access_time);
sw.Do(&m_spu_access_time);
sw.DoBytes(g_ram, sizeof(g_ram));
sw.DoBytes(g_bios, sizeof(g_bios));
sw.DoBytes(g_ram, RAM_SIZE);
sw.DoBytes(g_bios, BIOS_SIZE);
sw.DoArray(m_MEMCTRL.regs, countof(m_MEMCTRL.regs));
sw.Do(&m_ram_size_reg);
sw.Do(&m_tty_line_buffer);
@ -222,6 +237,181 @@ void RecalculateMemoryTimings()
m_spu_access_time[2] + 1);
}
bool AllocateMemory()
{
if (!m_memory_arena.Create(MEMORY_ARENA_SIZE, true, false))
{
Log_ErrorPrint("Failed to create memory arena");
return false;
}
// Create the base views.
g_ram = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false));
g_bios = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, true, false));
if (!g_ram || !g_bios)
{
Log_ErrorPrint("Failed to create base views of memory");
return false;
}
return true;
}
void UpdateFastmemViews(bool enabled, bool isolate_cache)
{
m_fastmem_ram_views.clear();
if (!enabled)
{
m_fastmem_base = nullptr;
return;
}
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false");
if (!m_fastmem_base)
{
m_fastmem_base = static_cast<u8*>(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE));
if (!m_fastmem_base)
{
Log_ErrorPrint("Failed to find base address for fastmem");
return;
}
Log_InfoPrintf("Fastmem base: %p", m_fastmem_base);
CPU::g_state.fastmem_base = m_fastmem_base;
}
auto MapRAM = [](u32 base_address) {
u8* map_address = m_fastmem_base + base_address;
auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false, map_address);
if (!view)
{
Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
return;
}
// mark all pages with code as non-writable
for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++)
{
if (m_ram_code_bits[i])
{
u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false))
{
Log_ErrorPrintf("Failed to write-protect code page at %p");
return;
}
}
}
m_fastmem_ram_views.push_back(std::move(view.value()));
};
auto MapBIOS = [](u32 base_address) {
u8* map_address = m_fastmem_base + base_address;
auto view = m_memory_arena.CreateView(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, false, false, map_address);
if (!view)
{
Log_ErrorPrintf("Failed to map BIOS at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
return;
}
m_fastmem_ram_views.push_back(std::move(view.value()));
};
if (!isolate_cache)
{
// KUSEG - cached
MapRAM(0x00000000);
// MapBIOS(0x1FC00000);
// KSEG0 - cached
MapRAM(0x80000000);
// MapBIOS(0x9FC00000);
}
// KSEG1 - uncached
MapRAM(0xA0000000);
// MapBIOS(0xBFC00000);
}
bool IsRAMCodePage(u32 index)
{
return m_ram_code_bits[index];
}
void SetRAMCodePage(u32 index)
{
if (m_ram_code_bits[index])
return;
// protect fastmem pages
m_ram_code_bits[index] = true;
SetCodePageFastmemProtection(index, false);
}
void ClearRAMCodePage(u32 index)
{
if (!m_ram_code_bits[index])
return;
// unprotect fastmem pages
m_ram_code_bits[index] = false;
SetCodePageFastmemProtection(index, true);
}
void SetCodePageFastmemProtection(u32 page_index, bool writable)
{
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false))
{
Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index,
page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address);
}
}
}
void ClearRAMCodePageFlags()
{
m_ram_code_bits.reset();
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false))
{
Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer());
}
}
}
bool IsCodePageAddress(PhysicalMemoryAddress address)
{
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false;
}
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
{
if (!IsRAMAddress(start_address))
return false;
start_address = (start_address & RAM_MASK);
const u32 end_address = start_address + size;
while (start_address < end_address)
{
const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE;
if (m_ram_code_bits[code_page_index])
return true;
start_address += CPU_CODE_CACHE_PAGE_SIZE;
}
return false;
}
static TickCount DoInvalidAccess(MemoryAccessType type, MemoryAccessSize size, PhysicalMemoryAddress address,
u32& value)
{

View File

@ -1,5 +1,6 @@
#pragma once
#include "common/bitfield.h"
#include "common/memory_arena.h"
#include "types.h"
#include <array>
#include <bitset>
@ -65,26 +66,69 @@ enum : u32
MEMCTRL_REG_COUNT = 9
};
void Initialize();
enum : TickCount
{
RAM_READ_TICKS = 4
};
enum : size_t
{
// Our memory arena contains storage for RAM and BIOS.
MEMORY_ARENA_SIZE = RAM_SIZE + BIOS_SIZE,
// Offsets within the memory arena.
MEMORY_ARENA_RAM_OFFSET = 0,
MEMORY_ARENA_BIOS_OFFSET = MEMORY_ARENA_RAM_OFFSET + RAM_SIZE,
// Fastmem region size is 4GB to cover the entire 32-bit address space.
FASTMEM_REGION_SIZE = UINT64_C(0x100000000)
};
bool Initialize();
void Shutdown();
void Reset();
bool DoState(StateWrapper& sw);
u8* GetFastmemBase();
bool AllocateMemory();
void UpdateFastmemViews(bool enabled, bool isolate_cache);
void SetExpansionROM(std::vector<u8> data);
void SetBIOS(const std::vector<u8>& image);
extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
extern u8* g_ram; // 2MB RAM
extern u8* g_bios; // 512K BIOS ROM
/// Returns true if the address specified is writable (RAM).
ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address)
{
return address < RAM_MIRROR_END;
}
/// Returns the code page index for a RAM address.
ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address)
{
return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE;
}
/// Returns true if the specified page contains code.
bool IsRAMCodePage(u32 index);
/// Flags a RAM region as code, so we know when to invalidate blocks.
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
void SetRAMCodePage(u32 index);
/// Unflags a RAM region as code, the code cache will no longer be notified when writes occur.
ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; }
void ClearRAMCodePage(u32 index);
/// Clears all code bits for RAM regions.
ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); }
void ClearRAMCodePageFlags();
/// Returns true if the specified address is in a code page.
bool IsCodePageAddress(PhysicalMemoryAddress address);
/// Returns true if the range specified overlaps with a code page.
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size);
/// Returns the number of cycles stolen by DMA RAM access.
ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
@ -96,4 +140,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
} // namespace Bus
} // namespace Bus

View File

@ -5,6 +5,7 @@
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
#include "settings.h"
#include "system.h"
#include "timing_event.h"
Log_SetChannel(CPU::CodeCache);
@ -61,6 +62,7 @@ static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
#endif
using BlockMap = std::unordered_map<u32, CodeBlock*>;
using HostCodeMap = std::map<CodeBlock::HostCodePointer, CodeBlock*>;
void LogCurrentState();
@ -85,36 +87,49 @@ static void LinkBlock(CodeBlock* from, CodeBlock* to);
/// Unlink all blocks which point to this block, and any that this block links to.
static void UnlinkBlock(CodeBlock* block);
static bool s_use_recompiler = false;
static BlockMap s_blocks;
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
void Initialize(bool use_recompiler)
#ifdef WITH_RECOMPILER
static HostCodeMap s_host_code_map;
static void AddBlockToHostCodeMap(CodeBlock* block);
static void RemoveBlockFromHostCodeMap(CodeBlock* block);
static bool InitializeFastmem();
static void ShutdownFastmem();
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
#endif
void Initialize()
{
Assert(s_blocks.empty());
#ifdef WITH_RECOMPILER
s_use_recompiler = use_recompiler;
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
#else
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
#endif
if (g_settings.IsUsingRecompiler())
{
Panic("Failed to initialize code space");
}
ResetFastMap();
CompileDispatcher();
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
#else
s_use_recompiler = false;
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
#endif
{
Panic("Failed to initialize code space");
}
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
ResetFastMap();
CompileDispatcher();
}
#endif
}
void Shutdown()
{
Flush();
ShutdownFastmem();
#ifdef WITH_RECOMPILER
s_code_buffer.Destroy();
#endif
@ -279,14 +294,33 @@ void ExecuteRecompiler()
#endif
void SetUseRecompiler(bool enable)
void Reinitialize()
{
#ifdef WITH_RECOMPILER
if (s_use_recompiler == enable)
return;
s_use_recompiler = enable;
Flush();
#ifdef WITH_RECOMPILER
ShutdownFastmem();
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
#else
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
#endif
{
Panic("Failed to initialize code space");
}
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
ResetFastMap();
CompileDispatcher();
}
#endif
}
@ -298,8 +332,10 @@ void Flush()
for (const auto& it : s_blocks)
delete it.second;
s_blocks.clear();
#ifdef WITH_RECOMPILER
s_host_code_map.clear();
s_code_buffer.Reset();
ResetFastMap();
CompileDispatcher();
@ -358,6 +394,8 @@ CodeBlock* LookupBlock(CodeBlockKey key)
}
iter = s_blocks.emplace(key.bits, block).first;
AddBlockToHostCodeMap(block);
return block;
}
@ -384,6 +422,8 @@ bool RevalidateBlock(CodeBlock* block)
return true;
recompile:
RemoveBlockFromHostCodeMap(block);
block->instructions.clear();
if (!CompileBlock(block))
{
@ -393,6 +433,7 @@ recompile:
}
// re-add to page map again
AddBlockToHostCodeMap(block);
if (block->IsInRAM())
AddBlockToPageMap(block);
@ -439,6 +480,9 @@ bool CompileBlock(CodeBlock* block)
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
}
block->contains_loadstore_instructions |= cbi.is_load_instruction;
block->contains_loadstore_instructions |= cbi.is_store_instruction;
// instruction is decoded now
block->instructions.push_back(cbi);
pc += sizeof(cbi.instruction.bits);
@ -481,7 +525,7 @@ bool CompileBlock(CodeBlock* block)
}
#ifdef WITH_RECOMPILER
if (s_use_recompiler)
if (g_settings.IsUsingRecompiler())
{
// Ensure we're not going to run out of space while compiling this block.
if (s_code_buffer.GetFreeCodeSpace() <
@ -552,6 +596,9 @@ void FlushBlock(CodeBlock* block)
RemoveBlockFromPageMap(block);
UnlinkBlock(block);
#ifdef WITH_RECOMPILER
RemoveBlockFromHostCodeMap(block);
#endif
s_blocks.erase(iter);
delete block;
@ -613,4 +660,107 @@ void UnlinkBlock(CodeBlock* block)
block->link_successors.clear();
}
#ifdef WITH_RECOMPILER
void AddBlockToHostCodeMap(CodeBlock* block)
{
if (!g_settings.IsUsingRecompiler())
return;
auto ir = s_host_code_map.emplace(block->host_code, block);
Assert(ir.second);
}
void RemoveBlockFromHostCodeMap(CodeBlock* block)
{
if (!g_settings.IsUsingRecompiler())
return;
HostCodeMap::iterator hc_iter = s_host_code_map.find(block->host_code);
Assert(hc_iter != s_host_code_map.end());
s_host_code_map.erase(hc_iter);
}
bool InitializeFastmem()
{
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler))
{
Log_ErrorPrintf("Failed to install page fault handler");
return false;
}
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
return true;
}
void ShutdownFastmem()
{
Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
Bus::UpdateFastmemViews(false, false);
}
Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
{
if (static_cast<u8*>(fault_address) < g_state.fastmem_base ||
(static_cast<u8*>(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE)
{
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
const PhysicalMemoryAddress fastmem_address =
static_cast<PhysicalMemoryAddress>(static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - g_state.fastmem_base));
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
is_write ? "(write)" : "(read)", fastmem_address);
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(fastmem_address))
{
// this is probably a code page, since we aren't going to fault due to requiring fastmem on RAM.
const u32 code_page_index = Bus::GetRAMCodePageIndex(fastmem_address);
if (Bus::IsRAMCodePage(code_page_index))
{
InvalidateBlocksWithPageIndex(code_page_index);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
}
// use upper_bound to find the next block after the pc
HostCodeMap::iterator upper_iter =
s_host_code_map.upper_bound(reinterpret_cast<CodeBlock::HostCodePointer>(exception_pc));
if (upper_iter == s_host_code_map.begin())
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
// then decrement it by one to (hopefully) get the block we want
upper_iter--;
// find the loadstore info in the code block
CodeBlock* block = upper_iter->second;
for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
++bpi_iter)
{
const Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
if (lbi.host_pc == exception_pc)
{
// found it, do fixup
if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi))
{
// remove the backpatch entry since we won't be coming back to this one
block->loadstore_backpatch_info.erase(bpi_iter);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
else
{
Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
}
}
// we didn't find the pc in our list..
Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
#endif
} // namespace CPU::CodeCache

View File

@ -2,12 +2,18 @@
#include "bus.h"
#include "common/bitfield.h"
#include "common/jit_code_buffer.h"
#include "common/page_fault_handler.h"
#include "cpu_types.h"
#include <array>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
#ifdef WITH_RECOMPILER
#include "cpu_recompiler_types.h"
#endif
namespace CPU {
enum : u32
@ -71,6 +77,12 @@ struct CodeBlock
TickCount uncached_fetch_ticks = 0;
u32 icache_line_count = 0;
#ifdef WITH_RECOMPILER
std::vector<Recompiler::LoadStoreBackpatchInfo> loadstore_backpatch_info;
#endif
bool contains_loadstore_instructions = false;
bool invalidated = false;
const u32 GetPC() const { return key.GetPC(); }
@ -89,7 +101,7 @@ struct CodeBlock
namespace CodeCache {
void Initialize(bool use_recompiler);
void Initialize();
void Shutdown();
void Execute();
@ -102,7 +114,7 @@ void ExecuteRecompiler();
void Flush();
/// Changes whether the recompiler is enabled.
void SetUseRecompiler(bool enable);
void Reinitialize();
/// Invalidates all blocks which are in the range of the specified code page.
void InvalidateBlocksWithPageIndex(u32 page_index);

View File

@ -1,4 +1,5 @@
#include "cpu_core.h"
#include "bus.h"
#include "common/align.h"
#include "common/file_system.h"
#include "common/log.h"
@ -1563,6 +1564,11 @@ bool InterpretInstructionPGXP()
return g_state.exception_raised;
}
void UpdateFastmemMapping()
{
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
}
} // namespace Recompiler::Thunks
} // namespace CPU

View File

@ -79,6 +79,8 @@ struct State
// GTE registers are stored here so we can access them on ARM with a single instruction
GTE::Regs gte_regs = {};
u8* fastmem_base = nullptr;
// data cache (used as scratchpad)
std::array<u8, DCACHE_SIZE> dcache = {};
std::array<u32, ICACHE_LINES> icache_tags = {};

View File

@ -19,8 +19,7 @@ u32 CodeGenerator::CalculateRegisterOffset(Reg reg)
return u32(offsetof(State, regs.r[0]) + (static_cast<u32>(reg) * sizeof(u32)));
}
bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code,
u32* out_host_code_size)
bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
{
// TODO: Align code buffer.
@ -40,8 +39,10 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
m_current_instruction = cbi;
if (!CompileInstruction(*cbi))
{
m_current_instruction = nullptr;
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
@ -60,6 +61,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
m_current_instruction = nullptr;
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
@ -1895,7 +1897,22 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
value = AndValues(value, Value::FromConstantU32(write_mask));
}
EmitStoreCPUStructField(offset, value);
// changing SR[Isc] needs to update fastmem views
if (reg == Cop0Reg::SR && g_settings.cpu_fastmem)
{
LabelType skip_fastmem_update;
Value old_value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset);
EmitStoreCPUStructField(offset, value);
EmitXor(old_value.host_reg, old_value.host_reg, value);
EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_fastmem_update);
EmitFunctionCall(nullptr, &Thunks::UpdateFastmemMapping, m_register_cache.GetCPUPtr());
EmitBindLabel(&skip_fastmem_update);
}
else
{
EmitStoreCPUStructField(offset, value);
}
}
}

View File

@ -23,7 +23,9 @@ public:
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
CodeBlock::HostCodePointer CompileDispatcher();
@ -73,7 +75,11 @@ public:
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code);
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code);
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
@ -204,9 +210,10 @@ private:
bool Compile_cop2(const CodeBlockInstruction& cbi);
JitCodeBuffer* m_code_buffer;
const CodeBlock* m_block = nullptr;
CodeBlock* m_block = nullptr;
const CodeBlockInstruction* m_block_start = nullptr;
const CodeBlockInstruction* m_block_end = nullptr;
const CodeBlockInstruction* m_current_instruction = nullptr;
RegisterCache m_register_cache;
CodeEmitter m_near_emitter;
CodeEmitter m_far_emitter;

View File

@ -14,6 +14,7 @@ namespace a64 = vixl::aarch64;
namespace CPU::Recompiler {
constexpr HostReg RCPUPTR = 19;
constexpr HostReg RMEMBASEPTR = 20;
constexpr HostReg RRETURN = 0;
constexpr HostReg RARG1 = 0;
constexpr HostReg RARG2 = 1;
@ -86,6 +87,11 @@ static const a64::XRegister GetCPUPtrReg()
return GetHostReg64(RCPUPTR);
}
static const a64::XRegister GetFastmemBasePtrReg()
{
return GetHostReg64(RMEMBASEPTR);
}
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
@ -188,10 +194,21 @@ void CodeGenerator::EmitBeginBlock()
// Store the CPU struct pointer. TODO: make this better.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
// If there's loadstore instructions, preload the fastmem base.
if (m_block->contains_loadstore_instructions)
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
}
}
void CodeGenerator::EmitEndBlock()
{
if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.PopCalleeSavedRegisters(true);
@ -1308,12 +1325,105 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_64);
if (g_settings.IsUsingFastmem())
{
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
bpi.host_pc = GetCurrentNearCodePointer();
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
break;
default:
UnreachableCode();
break;
}
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
// NOTE: This can leave junk in the upper bits
switch (size)
{
@ -1342,7 +1452,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->Bind(&load_okay);
// load exception path
SwitchToFarCode();
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (-result << 2) | BD | cop_n
m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg));
@ -1353,37 +1464,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
SwitchToNearCode();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
else
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (size)
{
case RegSize_8:
@ -1402,27 +1490,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
UnreachableCode();
break;
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
break;
default:
UnreachableCode();
break;
}
return result;
}
}
@ -1443,11 +1510,87 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
if (g_settings.IsUsingFastmem())
{
// we need the value in a host register to store it
Value value_in_hr = GetValueInHostRegister(value);
EmitStoreGuestMemoryFastmem(cbi, address, value_in_hr);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
}
}
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH));
bpi.host_pc = GetCurrentNearCodePointer();
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
}
switch (value.size)
{
case RegSize_8:
m_emit->Strb(GetHostReg8(value), actual_address);
break;
case RegSize_16:
m_emit->Strh(GetHostReg16(value), actual_address);
break;
case RegSize_32:
m_emit->Str(GetHostReg32(value), actual_address);
break;
default:
UnreachableCode();
break;
}
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value, bool in_far_code)
{
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
Assert(!in_far_code);
Value result = m_register_cache.AllocateScratch(RegSize_32);
switch (value.size)
{
case RegSize_8:
@ -1475,7 +1618,8 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->Bind(&store_okay);
// store exception path
SwitchToFarCode();
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (result << 2) | BD | cop_n
m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
@ -1484,15 +1628,14 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
if (!in_far_code)
EmitExceptionExit();
SwitchToNearCode();
m_register_cache.PopState();
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (value.size)
{
case RegSize_8:
@ -1514,6 +1657,30 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
}
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
// check jump distance
const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.host_slowmem_pc) - reinterpret_cast<intptr_t>(lbi.host_pc));
Assert(Common::IsAligned(jump_distance, 4));
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
// turn it into a jump to the slowmem handler
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size,
a64::PositionDependentCode);
emit.b(jump_distance >> 2);
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
return true;
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
EmitLoadGlobalAddress(RSCRATCH, ptr);

View File

@ -1,4 +1,5 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
@ -12,6 +13,7 @@ namespace CPU::Recompiler {
#if defined(ABI_WIN64)
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
constexpr HostReg RARG1 = Xbyak::Operand::RCX;
constexpr HostReg RARG2 = Xbyak::Operand::RDX;
@ -21,6 +23,7 @@ constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16;
#elif defined(ABI_SYSV)
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
constexpr HostReg RARG1 = Xbyak::Operand::RDI;
constexpr HostReg RARG2 = Xbyak::Operand::RSI;
@ -79,6 +82,11 @@ static const Xbyak::Reg64 GetCPUPtrReg()
return GetHostReg64(RCPUPTR);
}
static const Xbyak::Reg64 GetFastmemBasePtrReg()
{
return GetHostReg64(RMEMBASEPTR);
}
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
@ -140,7 +148,6 @@ void CodeGenerator::InitHostRegs()
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI,
Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12,
Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15});
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
#elif defined(ABI_SYSV)
m_register_cache.SetHostRegAllocationOrder(
{Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
@ -154,8 +161,9 @@ void CodeGenerator::InitHostRegs()
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP,
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14,
Xbyak::Operand::R15});
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
#endif
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
}
void CodeGenerator::SwitchToFarCode()
@ -196,11 +204,22 @@ void CodeGenerator::EmitBeginBlock()
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// If there's loadstore instructions, preload the fastmem base.
if (m_block->contains_loadstore_instructions)
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
}
}
void CodeGenerator::EmitEndBlock()
{
m_register_cache.FreeHostReg(RCPUPTR);
if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
@ -1762,12 +1781,139 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_64);
if (g_settings.IsUsingFastmem())
{
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
bpi.host_pc = GetCurrentNearCodePointer();
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
}
// TODO: BIOS reads...
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
for (u32 i = 0; i < nops; i++)
m_emit->nop();
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
// return to the block code
m_emit->jmp(GetCurrentNearCodePointer());
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
// NOTE: This can leave junk in the upper bits
switch (size)
{
@ -1794,7 +1940,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
m_register_cache.PushState();
// load exception path
SwitchToFarCode();
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (-result << 2) | BD | cop_n
m_emit->neg(GetHostReg32(result.host_reg));
@ -1805,37 +1952,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
SwitchToNearCode();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
else
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (size)
{
case RegSize_8:
@ -1854,27 +1978,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
UnreachableCode();
break;
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
break;
default:
UnreachableCode();
break;
}
return result;
}
}
@ -1895,11 +1998,163 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
AddPendingCycles(true);
if (g_settings.IsUsingFastmem())
{
EmitStoreGuestMemoryFastmem(cbi, address, value);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
}
}
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
bpi.host_pc = GetCurrentNearCodePointer();
}
switch (value.size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg8(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg8(value.host_reg));
}
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg16(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg16(value.host_reg));
}
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg32(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg32(value.host_reg));
}
}
}
break;
}
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
for (u32 i = 0; i < nops; i++)
m_emit->nop();
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
// return to the block code
m_emit->jmp(GetCurrentNearCodePointer());
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
Assert(!in_far_code);
Value result = m_register_cache.AllocateScratch(RegSize_32);
switch (value.size)
{
case RegSize_8:
@ -1925,24 +2180,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->jnz(GetCurrentFarCodePointer());
// store exception path
SwitchToFarCode();
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (result << 2) | BD | cop_n
m_emit->shl(GetHostReg32(result.host_reg), 2);
m_emit->or_(GetHostReg32(result.host_reg),
m_emit->shl(GetHostReg32(result), 2);
m_emit->or_(GetHostReg32(result),
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
cbi.instruction.cop.cop_n));
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
SwitchToNearCode();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (value.size)
{
case RegSize_8:
@ -1964,6 +2219,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
}
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
// turn it into a jump to the slowmem handler
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
cg.jmp(lbi.host_slowmem_pc);
const s32 nops = static_cast<s32>(lbi.host_code_size) -
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(lbi.host_pc)));
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
cg.nop();
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
return true;
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
const s64 displacement =

View File

@ -32,6 +32,7 @@ void UncheckedWriteMemoryByte(u32 address, u8 value);
void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
void UncheckedWriteMemoryWord(u32 address, u32 value);
void UpdateFastmemMapping();
} // namespace Recompiler::Thunks

View File

@ -127,6 +127,16 @@ constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = false;
#endif
struct LoadStoreBackpatchInfo
{
void* host_pc; // pointer to instruction which will fault
void* host_slowmem_pc; // pointer to slowmem callback code
u32 host_code_size; // size of the fastmem load as well as the add for cycles
HostReg address_host_reg; // register containing the guest address to load/store
HostReg value_host_reg; // register containing the source/destination
PhysicalMemoryAddress guest_pc;
};
} // namespace Recompiler
} // namespace CPU

View File

@ -366,6 +366,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
si.SetBoolValue("CPU", "ICache", false);
si.SetBoolValue("CPU", "Fastmem", true);
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
si.SetIntValue("GPU", "ResolutionScale", 1);
@ -512,12 +513,13 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
if (g_settings.emulation_speed != old_settings.emulation_speed)
System::UpdateThrottlePeriod();
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode)
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode ||
g_settings.cpu_fastmem != old_settings.cpu_fastmem)
{
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
CPU::CodeCache::Flush();
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode%s.",
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode),
g_settings.cpu_fastmem ? " (fastmem)" : "");
CPU::CodeCache::Reinitialize();
CPU::ClearICache();
}

View File

@ -96,6 +96,7 @@ void Settings::Load(SettingsInterface& si)
.value_or(DEFAULT_CPU_EXECUTION_MODE);
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true);
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER);
@ -217,6 +218,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
si.SetBoolValue("CPU", "Fastmem", cpu_fastmem);
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());

View File

@ -72,6 +72,7 @@ struct Settings
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false;
bool cpu_fastmem = true;
float emulation_speed = 1.0f;
bool speed_limiter_enabled = true;
@ -172,6 +173,11 @@ struct Settings
return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled;
}
ALWAYS_INLINE bool IsUsingFastmem() const
{
return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions);
}
bool HasAnyPerGameMemoryCards() const;
enum : u32

View File

@ -708,14 +708,16 @@ bool Initialize(bool force_software_renderer)
TimingEvents::Initialize();
CPU::Initialize();
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
Bus::Initialize();
if (!Bus::Initialize())
return false;
CPU::CodeCache::Initialize();
if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer))
return false;
g_dma.Initialize();
g_interrupt_controller.Initialize();
g_cdrom.Initialize();

View File

@ -129,6 +129,6 @@ enum : u32
enum : u32
{
CPU_CODE_CACHE_PAGE_SIZE = 1024,
CPU_CODE_CACHE_PAGE_SIZE = 4096,
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
};