Merge branch 'fastmem2' into threaded-gpu

This commit is contained in:
Connor McLaughlin 2020-09-20 22:11:02 +10:00
commit 9d49ed6299
30 changed files with 2203 additions and 284 deletions

View File

@ -56,6 +56,10 @@ add_library(common
minizip_helpers.h
null_audio_stream.cpp
null_audio_stream.h
memory_arena.cpp
memory_arena.h
page_fault_handler.cpp
page_fault_handler.h
rectangle.h
progress_callback.cpp
progress_callback.h
@ -180,3 +184,8 @@ if(APPLE AND NOT BUILD_LIBRETRO_CORE)
gl/context_agl.h
)
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
# We need -lrt for shm_unlink
target_link_libraries(common PRIVATE rt)
endif()

View File

@ -70,6 +70,8 @@
<ClInclude Include="md5_digest.h" />
<ClInclude Include="null_audio_stream.h" />
<ClInclude Include="progress_callback.h" />
<ClInclude Include="memory_arena.h" />
<ClInclude Include="page_fault_handler.h" />
<ClInclude Include="rectangle.h" />
<ClInclude Include="cd_subchannel_replacement.h" />
<ClInclude Include="scope_guard.h" />
@ -130,6 +132,8 @@
<ClCompile Include="null_audio_stream.cpp" />
<ClCompile Include="progress_callback.cpp" />
<ClCompile Include="shiftjis.cpp" />
<ClCompile Include="memory_arena.cpp" />
<ClCompile Include="page_fault_handler.cpp" />
<ClCompile Include="state_wrapper.cpp" />
<ClCompile Include="cd_xa.cpp" />
<ClCompile Include="string.cpp" />

View File

@ -103,6 +103,8 @@
<ClInclude Include="win32_progress_callback.h" />
<ClInclude Include="make_array.h" />
<ClInclude Include="shiftjis.h" />
<ClInclude Include="memory_arena.h" />
<ClInclude Include="page_fault_handler.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="jit_code_buffer.cpp" />
@ -198,6 +200,8 @@
<ClCompile Include="minizip_helpers.cpp" />
<ClCompile Include="win32_progress_callback.cpp" />
<ClCompile Include="shiftjis.cpp" />
<ClCompile Include="memory_arena.cpp" />
<ClCompile Include="page_fault_handler.cpp" />
</ItemGroup>
<ItemGroup>
<Natvis Include="bitfield.natvis" />

213
src/common/memory_arena.cpp Normal file
View File

@ -0,0 +1,213 @@
#include "memory_arena.h"
#include "common/assert.h"
#include "common/log.h"
#include "common/string_util.h"
Log_SetChannel(Common::MemoryArena);
#if defined(WIN32)
#include "common/windows_headers.h"
#elif defined(__linux__) || defined(__ANDROID__)
#include <cerrno>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
namespace Common {
MemoryArena::MemoryArena() = default;
MemoryArena::~MemoryArena()
{
#if defined(WIN32)
if (m_file_handle)
CloseHandle(m_file_handle);
#elif defined(__linux__)
if (m_shmem_fd > 0)
close(m_shmem_fd);
#endif
}
void* MemoryArena::FindBaseAddressForMapping(size_t size)
{
void* base_address;
#if defined(WIN32)
base_address = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE);
if (base_address)
VirtualFree(base_address, 0, MEM_RELEASE);
#elif defined(__linux__)
base_address = mmap(nullptr, size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (base_address)
munmap(base_address, size);
#elif defined(__ANDROID__)
base_address = mmap(nullptr, size, PROT_NONE, MAP_ANON | MAP_SHARED, -1, 0);
if (base_address)
munmap(base_address, size);
#else
base_address = nullptr;
#endif
if (!base_address)
{
Log_ErrorPrintf("Failed to get base address for memory mapping of size %zu", size);
return nullptr;
}
return base_address;
}
bool MemoryArena::Create(size_t size, bool writable, bool executable)
{
#if defined(WIN32)
const std::string file_mapping_name =
StringUtil::StdStringFromFormat("common_memory_arena_%zu_%u", size, GetCurrentProcessId());
const DWORD protect = (writable ? (executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE) : PAGE_READONLY);
m_file_handle = CreateFileMappingA(INVALID_HANDLE_VALUE, nullptr, protect, Truncate32(size >> 32), Truncate32(size),
file_mapping_name.c_str());
if (!m_file_handle)
{
Log_ErrorPrintf("CreateFileMapping failed: %u", GetLastError());
return false;
}
return true;
#elif defined(__linux__)
const std::string file_mapping_name =
StringUtil::StdStringFromFormat("common_memory_arena_%zu_%u", size, static_cast<unsigned>(getpid()));
m_shmem_fd = shm_open(file_mapping_name.c_str(), O_CREAT | O_EXCL | (writable ? O_RDWR : O_RDONLY), 0600);
if (m_shmem_fd < 0)
{
Log_ErrorPrintf("shm_open failed: %d", errno);
return false;
}
// we're not going to be opening this mapping in other processes, so remove the file
shm_unlink(file_mapping_name.c_str());
// ensure it's the correct size
if (ftruncate64(m_shmem_fd, static_cast<off64_t>(size)) < 0)
{
Log_ErrorPrintf("ftruncate64(%zu) failed: %d", size, errno);
return false;
}
return true;
#else
return false;
#endif
}
std::optional<MemoryArena::View> MemoryArena::CreateView(size_t offset, size_t size, bool writable, bool executable,
void* fixed_address)
{
void* base_pointer = CreateViewPtr(offset, size, writable, executable, fixed_address);
if (!base_pointer)
return std::nullopt;
return View(this, base_pointer, offset, size, writable);
}
void* MemoryArena::CreateViewPtr(size_t offset, size_t size, bool writable, bool executable,
void* fixed_address /*= nullptr*/)
{
void* base_pointer;
#if defined(WIN32)
const DWORD desired_access = FILE_MAP_READ | (writable ? FILE_MAP_WRITE : 0) | (executable ? FILE_MAP_EXECUTE : 0);
base_pointer =
MapViewOfFileEx(m_file_handle, desired_access, Truncate32(offset >> 32), Truncate32(offset), size, fixed_address);
if (!base_pointer)
return nullptr;
#elif defined(__linux__)
const int flags = (fixed_address != nullptr) ? (MAP_SHARED | MAP_FIXED) : MAP_SHARED;
const int prot = PROT_READ | (writable ? PROT_WRITE : 0) | (executable ? PROT_EXEC : 0);
base_pointer = mmap64(fixed_address, size, prot, flags, m_shmem_fd, static_cast<off64_t>(offset));
if (base_pointer == reinterpret_cast<void*>(-1))
return nullptr;
#else
return nullptr;
#endif
m_num_views.fetch_add(1);
return base_pointer;
}
bool MemoryArena::FlushViewPtr(void* address, size_t size)
{
#if defined(WIN32)
return FlushViewOfFile(address, size);
#elif defined(__linux__)
return (msync(address, size, 0) >= 0);
#else
return false;
#endif
}
bool MemoryArena::ReleaseViewPtr(void* address, size_t size)
{
bool result;
#if defined(WIN32)
result = static_cast<bool>(UnmapViewOfFile(address));
#elif defined(__linux__)
result = (munmap(address, size) >= 0);
#else
result = false;
#endif
if (!result)
{
Log_ErrorPrintf("Failed to unmap previously-created view at %p", address);
return false;
}
const size_t prev_count = m_num_views.fetch_sub(1);
Assert(prev_count > 0);
return true;
}
bool MemoryArena::SetPageProtection(void* address, size_t length, bool readable, bool writable, bool executable)
{
#if defined(WIN32)
static constexpr DWORD protection_table[2][2][2] = {
{{PAGE_NOACCESS, PAGE_EXECUTE}, {PAGE_WRITECOPY, PAGE_EXECUTE_WRITECOPY}},
{{PAGE_READONLY, PAGE_EXECUTE_READ}, {PAGE_READWRITE, PAGE_EXECUTE_READWRITE}}};
DWORD old_protect;
return static_cast<bool>(
VirtualProtect(address, length, protection_table[readable][writable][executable], &old_protect));
#elif defined(__linux__) || defined(__ANDROID__)
const int prot = (readable ? PROT_READ : 0) | (writable ? PROT_WRITE : 0) | (executable ? PROT_EXEC : 0);
return (mprotect(address, length, prot) >= 0);
#else
return false;
#endif
}
MemoryArena::View::View(MemoryArena* parent, void* base_pointer, size_t arena_offset, size_t mapping_size,
bool writable)
: m_parent(parent), m_base_pointer(base_pointer), m_arena_offset(arena_offset), m_mapping_size(mapping_size),
m_writable(writable)
{
}
MemoryArena::View::View(View&& view)
: m_parent(view.m_parent), m_base_pointer(view.m_base_pointer), m_arena_offset(view.m_arena_offset),
m_mapping_size(view.m_mapping_size)
{
view.m_parent = nullptr;
view.m_base_pointer = nullptr;
view.m_arena_offset = 0;
view.m_mapping_size = 0;
}
MemoryArena::View::~View()
{
if (m_parent)
{
if (m_writable && !m_parent->FlushViewPtr(m_base_pointer, m_mapping_size))
Panic("Failed to flush previously-created view");
if (!m_parent->ReleaseViewPtr(m_base_pointer, m_mapping_size))
Panic("Failed to unmap previously-created view");
}
}
} // namespace Common

58
src/common/memory_arena.h Normal file
View File

@ -0,0 +1,58 @@
#pragma once
#include "types.h"
#include <atomic>
#include <optional>
namespace Common {
class MemoryArena
{
public:
class View
{
public:
View(MemoryArena* parent, void* base_pointer, size_t arena_offset, size_t mapping_size, bool writable);
View(View&& view);
~View();
void* GetBasePointer() const { return m_base_pointer; }
size_t GetArenaOffset() const { return m_arena_offset; }
size_t GetMappingSize() const { return m_mapping_size; }
bool IsWritable() const { return m_writable; }
private:
MemoryArena* m_parent;
void* m_base_pointer;
size_t m_arena_offset;
size_t m_mapping_size;
bool m_writable;
};
MemoryArena();
~MemoryArena();
static void* FindBaseAddressForMapping(size_t size);
bool Create(size_t size, bool writable, bool executable);
std::optional<View> CreateView(size_t offset, size_t size, bool writable, bool executable,
void* fixed_address = nullptr);
void* CreateViewPtr(size_t offset, size_t size, bool writable, bool executable, void* fixed_address = nullptr);
bool FlushViewPtr(void* address, size_t size);
bool ReleaseViewPtr(void* address, size_t size);
static bool SetPageProtection(void* address, size_t length, bool readable, bool writable, bool executable);
private:
#if defined(WIN32)
void* m_file_handle = nullptr;
#elif defined(__linux__)
int m_shmem_fd = -1;
#endif
std::atomic_size_t m_num_views{0};
size_t m_size = 0;
bool m_writable = false;
bool m_executable = false;
};
} // namespace Common

View File

@ -0,0 +1,186 @@
#include "page_fault_handler.h"
#include "common/log.h"
#include <algorithm>
#include <cstring>
#include <mutex>
#include <vector>
Log_SetChannel(Common::PageFaultHandler);
#if defined(WIN32)
#include "common/windows_headers.h"
#elif defined(__linux__) || defined(__ANDROID__)
#include <signal.h>
#include <ucontext.h>
#include <unistd.h>
#define USE_SIGSEGV 1
#endif
namespace Common::PageFaultHandler {
struct RegisteredHandler
{
void* owner;
Callback callback;
};
static std::vector<RegisteredHandler> m_handlers;
static std::mutex m_handler_lock;
static thread_local bool s_in_handler;
#if defined(WIN32)
static PVOID s_veh_handle;
static LONG ExceptionHandler(PEXCEPTION_POINTERS exi)
{
if (exi->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION || s_in_handler)
return EXCEPTION_CONTINUE_SEARCH;
s_in_handler = true;
void* const exception_pc = reinterpret_cast<void*>(exi->ContextRecord->Rip);
void* const exception_address = reinterpret_cast<void*>(exi->ExceptionRecord->ExceptionInformation[1]);
bool const is_write = exi->ExceptionRecord->ExceptionInformation[0] == 1;
std::lock_guard<std::mutex> guard(m_handler_lock);
for (const RegisteredHandler& rh : m_handlers)
{
if (rh.callback(exception_pc, exception_address, is_write) == HandlerResult::ContinueExecution)
{
s_in_handler = false;
return EXCEPTION_CONTINUE_EXECUTION;
}
}
s_in_handler = false;
return EXCEPTION_CONTINUE_SEARCH;
}
#elif defined(USE_SIGSEGV)
static struct sigaction s_old_sigsegv_action;
static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
{
if ((info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR) || s_in_handler)
return;
void* const exception_address = reinterpret_cast<void*>(info->si_addr);
#if defined(__x86_64__)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_RIP]);
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_ERR] & 2) != 0;
#elif defined(__aarch64__)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
const bool is_write = false;
#else
void* const exception_pc = nullptr;
const bool is_write = false;
#endif
std::lock_guard<std::mutex> guard(m_handler_lock);
for (const RegisteredHandler& rh : m_handlers)
{
if (rh.callback(exception_pc, exception_address, is_write) == HandlerResult::ContinueExecution)
{
s_in_handler = false;
return;
}
}
// call old signal handler
if (s_old_sigsegv_action.sa_flags & SA_SIGINFO)
s_old_sigsegv_action.sa_sigaction(sig, info, ctx);
else if (s_old_sigsegv_action.sa_handler == SIG_DFL)
signal(sig, SIG_DFL);
else if (s_old_sigsegv_action.sa_handler == SIG_IGN)
return;
else
s_old_sigsegv_action.sa_handler(sig);
}
#endif
bool InstallHandler(void* owner, Callback callback)
{
bool was_empty;
{
std::lock_guard<std::mutex> guard(m_handler_lock);
if (std::find_if(m_handlers.begin(), m_handlers.end(),
[owner](const RegisteredHandler& rh) { return rh.owner == owner; }) != m_handlers.end())
{
return false;
}
was_empty = m_handlers.empty();
m_handlers.push_back(RegisteredHandler{owner, std::move(callback)});
}
if (was_empty)
{
#if defined(WIN32)
s_veh_handle = AddVectoredExceptionHandler(1, ExceptionHandler);
if (!s_veh_handle)
{
Log_ErrorPrint("Failed to add vectored exception handler");
return false;
}
#elif defined(USE_SIGSEGV)
#if 0
// TODO: Is this needed?
stack_t signal_stack = {};
signal_stack.ss_sp = malloc(SIGSTKSZ);
signal_stack.ss_size = SIGSTKSZ;
if (sigaltstack(&signal_stack, nullptr))
{
Log_ErrorPrintf("signaltstack() failed: %d", errno);
return false;
}
#endif
struct sigaction sa = {};
sa.sa_sigaction = SIGSEGVHandler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGSEGV, &sa, &s_old_sigsegv_action) < 0)
{
Log_ErrorPrintf("sigaction() failed: %d", errno);
return false;
}
#else
return false;
#endif
}
return true;
}
bool RemoveHandler(void* owner)
{
std::lock_guard<std::mutex> guard(m_handler_lock);
auto it = std::find_if(m_handlers.begin(), m_handlers.end(),
[owner](const RegisteredHandler& rh) { return rh.owner == owner; });
if (it == m_handlers.end())
return false;
m_handlers.erase(it);
if (m_handlers.empty())
{
#if defined(WIN32)
RemoveVectoredExceptionHandler(s_veh_handle);
s_veh_handle = nullptr;
#else
// restore old signal handler
if (sigaction(SIGSEGV, &s_old_sigsegv_action, nullptr) < 0)
{
Log_ErrorPrintf("sigaction() failed: %d", errno);
return false;
}
s_old_sigsegv_action = {};
#endif
}
return true;
}
} // namespace Common::PageFaultHandler

View File

@ -0,0 +1,17 @@
#pragma once
#include "types.h"
namespace Common::PageFaultHandler {
enum class HandlerResult
{
ContinueExecution,
ExecuteNextHandler,
};
using Callback = HandlerResult(*)(void* exception_pc, void* fault_address, bool is_write);
using Handle = void*;
bool InstallHandler(void* owner, Callback callback);
bool RemoveHandler(void* owner);
} // namespace Common::PageFaultHandler

View File

@ -10,6 +10,7 @@
#include "cpu_disasm.h"
#include "dma.h"
#include "gpu.h"
#include "host_interface.h"
#include "interrupt_controller.h"
#include "mdec.h"
#include "pad.h"
@ -69,8 +70,9 @@ union MEMCTRL
};
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
u8 g_ram[RAM_SIZE]{}; // 2MB RAM
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
u8* g_ram = nullptr; // 2MB RAM
u8* g_bios = nullptr; // 512K BIOS ROM
u8* g_scratchpad = nullptr;
static std::array<TickCount, 3> m_exp1_access_time = {};
static std::array<TickCount, 3> m_exp2_access_time = {};
@ -85,9 +87,19 @@ static u32 m_ram_size_reg = 0;
static std::string m_tty_line_buffer;
static Common::MemoryArena m_memory_arena;
static u8* m_fastmem_base = nullptr;
static std::vector<Common::MemoryArena::View> m_fastmem_ram_views;
static std::vector<Common::MemoryArena::View> m_fastmem_scratchpad_views;
static std::vector<Common::MemoryArena::View> m_fastmem_bios_views;
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
static void RecalculateMemoryTimings();
static void SetCodePageFastmemProtection(u32 page_index, bool writable);
static bool AllocateMemory();
static void UnmapFastmemViews();
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u))
#define FIXUP_HALFWORD_READ_OFFSET(offset) ((offset) & ~u32(1))
@ -103,19 +115,35 @@ ALWAYS_INLINE static void FixupUnalignedWordAccessW32(u32& offset, u32& value)
value <<= byte_offset * 8;
}
void Initialize()
bool Initialize()
{
if (!AllocateMemory())
{
g_host_interface->ReportError("Failed to allocate memory");
return false;
}
Reset();
return true;
}
void Shutdown()
{
//
UnmapFastmemViews();
if (g_ram)
m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE);
if (g_bios)
m_memory_arena.ReleaseViewPtr(g_bios, BIOS_SIZE);
if (g_scratchpad)
m_memory_arena.ReleaseViewPtr(g_scratchpad, FASTMEM_SCRATCHPAD_SIZE);
CPU::g_state.fastmem_base = nullptr;
}
void Reset()
{
std::memset(g_ram, 0, sizeof(g_ram));
std::memset(g_ram, 0, RAM_SIZE);
std::memset(g_scratchpad, 0, SCRATCHPAD_SIZE);
m_MEMCTRL.exp1_base = 0x1F000000;
m_MEMCTRL.exp2_base = 0x1F802000;
m_MEMCTRL.exp1_delay_size.bits = 0x0013243F;
@ -137,8 +165,9 @@ bool DoState(StateWrapper& sw)
sw.Do(&m_bios_access_time);
sw.Do(&m_cdrom_access_time);
sw.Do(&m_spu_access_time);
sw.DoBytes(g_ram, sizeof(g_ram));
sw.DoBytes(g_bios, sizeof(g_bios));
sw.DoBytes(g_ram, RAM_SIZE);
sw.DoBytes(g_bios, BIOS_SIZE);
sw.DoBytes(g_scratchpad, SCRATCHPAD_SIZE);
sw.DoArray(m_MEMCTRL.regs, countof(m_MEMCTRL.regs));
sw.Do(&m_ram_size_reg);
sw.Do(&m_tty_line_buffer);
@ -217,6 +246,209 @@ void RecalculateMemoryTimings()
m_spu_access_time[2] + 1);
}
bool AllocateMemory()
{
if (!m_memory_arena.Create(MEMORY_ARENA_SIZE, true, false))
{
Log_ErrorPrint("Failed to create memory arena");
return false;
}
// Create the base views.
g_ram = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false));
g_bios = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, true, false));
g_scratchpad = static_cast<u8*>(
m_memory_arena.CreateViewPtr(MEMORY_ARENA_SCRATCHPAD_OFFSET, FASTMEM_SCRATCHPAD_SIZE, true, false));
if (!g_ram || !g_bios)
{
Log_ErrorPrint("Failed to create base views of memory");
return false;
}
return true;
}
void UnmapFastmemViews()
{
m_fastmem_ram_views.clear();
m_fastmem_scratchpad_views.clear();
m_fastmem_bios_views.clear();
}
void UpdateFastmemViews(bool enabled, bool isolate_cache)
{
UnmapFastmemViews();
if (!enabled)
{
m_fastmem_base = nullptr;
return;
}
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false");
if (!m_fastmem_base)
{
m_fastmem_base = static_cast<u8*>(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE));
if (!m_fastmem_base)
{
Log_ErrorPrint("Failed to find base address for fastmem");
return;
}
Log_InfoPrintf("Fastmem base: %p", m_fastmem_base);
CPU::g_state.fastmem_base = m_fastmem_base;
}
auto MapRAM = [](u32 base_address) {
u8* map_address = m_fastmem_base + base_address;
auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false, map_address);
if (!view)
{
Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
return;
}
// mark all pages with code as non-writable
for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++)
{
if (m_ram_code_bits[i])
{
u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false))
Log_ErrorPrintf("Failed to write-protect code page at %p");
}
}
m_fastmem_ram_views.push_back(std::move(view.value()));
};
auto MapScratchpad = [](u32 base_address) {
u8* map_address = m_fastmem_base + base_address;
auto view =
m_memory_arena.CreateView(MEMORY_ARENA_SCRATCHPAD_OFFSET, FASTMEM_SCRATCHPAD_SIZE, true, false, map_address);
if (!view)
{
Log_ErrorPrintf("Failed to map scratchpad at fastmem area %p (offset 0x%08X)", map_address,
FASTMEM_SCRATCHPAD_SIZE);
return;
}
// mark all pages beyond the first as inaccessible
// we need to do this because of windows's stupidity with its 64K mapping granularity
if (!m_memory_arena.SetPageProtection(map_address + CPU_CODE_CACHE_PAGE_SIZE,
FASTMEM_SCRATCHPAD_SIZE - CPU_CODE_CACHE_PAGE_SIZE, false, false, false))
{
Log_ErrorPrintf("Failed to read/write protect scratchpad");
}
m_fastmem_scratchpad_views.push_back(std::move(view.value()));
};
auto MapBIOS = [](u32 base_address) {
u8* map_address = m_fastmem_base + base_address;
auto view = m_memory_arena.CreateView(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, false, false, map_address);
if (!view)
{
Log_ErrorPrintf("Failed to map BIOS at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
return;
}
m_fastmem_bios_views.push_back(std::move(view.value()));
};
if (!isolate_cache)
{
// KUSEG - cached
MapRAM(0x00000000);
// MapScratchpad(0x1F800000);
// MapBIOS(0x1FC00000);
// KSEG0 - cached
MapRAM(0x80000000);
// MapScratchpad(0x9F800000);
// MapBIOS(0x9FC00000);
}
// KSEG1 - uncached
MapRAM(0xA0000000);
// MapBIOS(0xBFC00000);
}
bool IsRAMCodePage(u32 index)
{
return m_ram_code_bits[index];
}
void SetRAMCodePage(u32 index)
{
if (m_ram_code_bits[index])
return;
// protect fastmem pages
m_ram_code_bits[index] = true;
SetCodePageFastmemProtection(index, false);
}
void ClearRAMCodePage(u32 index)
{
if (!m_ram_code_bits[index])
return;
// unprotect fastmem pages
m_ram_code_bits[index] = false;
SetCodePageFastmemProtection(index, true);
}
void SetCodePageFastmemProtection(u32 page_index, bool writable)
{
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE);
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false))
{
Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index,
page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address);
}
}
}
void ClearRAMCodePageFlags()
{
m_ram_code_bits.reset();
// unprotect fastmem pages
for (const auto& view : m_fastmem_ram_views)
{
if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false))
{
Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer());
}
}
}
bool IsCodePageAddress(PhysicalMemoryAddress address)
{
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false;
}
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
{
if (!IsRAMAddress(start_address))
return false;
start_address = (start_address & RAM_MASK);
const u32 end_address = start_address + size;
while (start_address < end_address)
{
const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE;
if (m_ram_code_bits[code_page_index])
return true;
start_address += CPU_CODE_CACHE_PAGE_SIZE;
}
return false;
}
static TickCount DoInvalidAccess(MemoryAccessType type, MemoryAccessSize size, PhysicalMemoryAddress address,
u32& value)
{
@ -288,7 +520,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value)
}
}
return (type == MemoryAccessType::Read) ? 4 : 0;
return (type == MemoryAccessType::Read) ? RAM_READ_TICKS : 0;
}
template<MemoryAccessType type, MemoryAccessSize size>
@ -753,7 +985,7 @@ ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void
{
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
if constexpr (add_ticks)
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
g_state.pending_ticks += (icache_read ? 1 : RAM_READ_TICKS) * word_count;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
@ -776,7 +1008,7 @@ TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
if (address < RAM_MIRROR_END)
{
return 4;
return RAM_READ_TICKS;
}
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
{
@ -894,34 +1126,36 @@ static void WriteCacheControl(u32 value)
template<MemoryAccessType type, MemoryAccessSize size>
ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address, u32& value)
{
using namespace Bus;
const PhysicalMemoryAddress cache_offset = address & DCACHE_OFFSET_MASK;
if constexpr (size == MemoryAccessSize::Byte)
{
if constexpr (type == MemoryAccessType::Read)
value = ZeroExtend32(g_state.dcache[cache_offset]);
value = ZeroExtend32(g_scratchpad[cache_offset]);
else
g_state.dcache[cache_offset] = Truncate8(value);
g_scratchpad[cache_offset] = Truncate8(value);
}
else if constexpr (size == MemoryAccessSize::HalfWord)
{
if constexpr (type == MemoryAccessType::Read)
{
u16 temp;
std::memcpy(&temp, &g_state.dcache[cache_offset], sizeof(temp));
std::memcpy(&temp, &g_scratchpad[cache_offset], sizeof(temp));
value = ZeroExtend32(temp);
}
else
{
u16 temp = Truncate16(value);
std::memcpy(&g_state.dcache[cache_offset], &temp, sizeof(temp));
std::memcpy(&g_scratchpad[cache_offset], &temp, sizeof(temp));
}
}
else if constexpr (size == MemoryAccessSize::Word)
{
if constexpr (type == MemoryAccessType::Read)
std::memcpy(&value, &g_state.dcache[cache_offset], sizeof(value));
std::memcpy(&value, &g_scratchpad[cache_offset], sizeof(value));
else
std::memcpy(&g_state.dcache[cache_offset], &value, sizeof(value));
std::memcpy(&g_scratchpad[cache_offset], &value, sizeof(value));
}
return 0;
@ -1307,6 +1541,64 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value)
return DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(addr, value) >= 0;
}
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks)
{
using namespace Bus;
const u32 seg = (address >> 29);
if (seg != 0 && seg != 4 && seg != 5)
return nullptr;
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
if (paddr < RAM_MIRROR_END)
{
if (read_ticks)
*read_ticks = RAM_READ_TICKS;
return &g_ram[paddr & RAM_MASK];
}
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
{
if (read_ticks)
*read_ticks = 0;
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
}
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
{
if (read_ticks)
*read_ticks = m_bios_access_time[static_cast<u32>(size)];
return &g_bios[paddr & BIOS_MASK];
}
return nullptr;
}
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size)
{
using namespace Bus;
const u32 seg = (address >> 29);
if (seg != 0 && seg != 4 && seg != 5)
return nullptr;
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
#if 0
// Not enabled until we can protect code regions.
if (paddr < RAM_MIRROR_END)
return &g_ram[paddr & RAM_MASK];
#endif
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
return nullptr;
}
namespace Recompiler::Thunks {
u64 ReadMemoryByte(u32 address)

View File

@ -1,6 +1,6 @@
#pragma once
#include "common/bitfield.h"
#include "cpu_code_cache.h"
#include "common/memory_arena.h"
#include "types.h"
#include <array>
#include <bitset>
@ -20,6 +20,9 @@ enum : u32
EXP1_BASE = 0x1F000000,
EXP1_SIZE = 0x800000,
EXP1_MASK = EXP1_SIZE - 1,
SCRATCHPAD_BASE = 0x1F800000,
SCRATCHPAD_SIZE = 0x400,
SCRATCHPAD_MASK = SCRATCHPAD_SIZE - 1,
MEMCTRL_BASE = 0x1F801000,
MEMCTRL_SIZE = 0x40,
MEMCTRL_MASK = MEMCTRL_SIZE - 1,
@ -66,26 +69,72 @@ enum : u32
MEMCTRL_REG_COUNT = 9
};
void Initialize();
enum : TickCount
{
RAM_READ_TICKS = 4
};
enum : size_t
{
FASTMEM_SCRATCHPAD_SIZE = 0x10000,
// Our memory arena contains storage for RAM and BIOS.
MEMORY_ARENA_SIZE = RAM_SIZE + FASTMEM_SCRATCHPAD_SIZE + BIOS_SIZE,
// Offsets within the memory arena.
MEMORY_ARENA_RAM_OFFSET = 0,
MEMORY_ARENA_SCRATCHPAD_OFFSET = MEMORY_ARENA_RAM_OFFSET + RAM_SIZE,
MEMORY_ARENA_BIOS_OFFSET = MEMORY_ARENA_SCRATCHPAD_OFFSET + FASTMEM_SCRATCHPAD_SIZE,
// Fastmem region size is 4GB to cover the entire 32-bit address space.
FASTMEM_REGION_SIZE = UINT64_C(0x100000000)
};
bool Initialize();
void Shutdown();
void Reset();
bool DoState(StateWrapper& sw);
u8* GetFastmemBase();
void UpdateFastmemViews(bool enabled, bool isolate_cache);
void SetExpansionROM(std::vector<u8> data);
void SetBIOS(const std::vector<u8>& image);
extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
extern u8* g_ram; // 2MB RAM
extern u8* g_bios; // 512K BIOS ROM
extern u8* g_scratchpad; // 1KB scratchpad as 4K (in fastmem)
/// Returns true if the address specified is writable (RAM).
ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address)
{
return address < RAM_MIRROR_END;
}
/// Returns the code page index for a RAM address.
ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address)
{
return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE;
}
/// Returns true if the specified page contains code.
bool IsRAMCodePage(u32 index);
/// Flags a RAM region as code, so we know when to invalidate blocks.
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
void SetRAMCodePage(u32 index);
/// Unflags a RAM region as code, the code cache will no longer be notified when writes occur.
ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; }
void ClearRAMCodePage(u32 index);
/// Clears all code bits for RAM regions.
ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); }
void ClearRAMCodePageFlags();
/// Returns true if the specified address is in a code page.
bool IsCodePageAddress(PhysicalMemoryAddress address);
/// Returns true if the range specified overlaps with a code page.
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size);
/// Returns the number of cycles stolen by DMA RAM access.
ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
@ -97,16 +146,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
}
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
} // namespace Bus

View File

@ -5,6 +5,7 @@
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
#include "settings.h"
#include "system.h"
#include "timing_event.h"
Log_SetChannel(CPU::CodeCache);
@ -35,14 +36,8 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
static JitCodeBuffer s_code_buffer;
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
CodeBlock::HostCodePointer s_asm_dispatcher;
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
{
@ -51,6 +46,7 @@ ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
((pc & Bus::RAM_MASK) >> 2);
}
static void CompileDispatcher();
static void FastCompileBlockFunction();
static void ResetFastMap()
@ -66,6 +62,7 @@ static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
#endif
using BlockMap = std::unordered_map<u32, CodeBlock*>;
using HostCodeMap = std::map<CodeBlock::HostCodePointer, CodeBlock*>;
void LogCurrentState();
@ -90,16 +87,26 @@ static void LinkBlock(CodeBlock* from, CodeBlock* to);
/// Unlink all blocks which point to this block, and any that this block links to.
static void UnlinkBlock(CodeBlock* block);
static bool s_use_recompiler = false;
static BlockMap s_blocks;
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
void Initialize(bool use_recompiler)
#ifdef WITH_RECOMPILER
static HostCodeMap s_host_code_map;
static void AddBlockToHostCodeMap(CodeBlock* block);
static void RemoveBlockFromHostCodeMap(CodeBlock* block);
static bool InitializeFastmem();
static void ShutdownFastmem();
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
#endif
void Initialize()
{
Assert(s_blocks.empty());
#ifdef WITH_RECOMPILER
s_use_recompiler = use_recompiler;
if (g_settings.IsUsingRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
@ -110,15 +117,19 @@ void Initialize(bool use_recompiler)
Panic("Failed to initialize code space");
}
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
ResetFastMap();
#else
s_use_recompiler = false;
CompileDispatcher();
}
#endif
}
void Shutdown()
{
Flush();
ShutdownFastmem();
#ifdef WITH_RECOMPILER
s_code_buffer.Destroy();
#endif
@ -131,18 +142,17 @@ static void ExecuteImpl()
while (!g_state.frame_done)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
next_block_key = GetNextBlockKey();
}
CodeBlock* block = LookupBlock(next_block_key);
if (!block)
{
@ -152,6 +162,7 @@ static void ExecuteImpl()
}
reexecute_block:
Assert(!(HasPendingInterrupt()));
#if 0
const u32 tick = TimingEvents::GetGlobalTickCounter() + CPU::GetPendingTicks();
@ -170,7 +181,7 @@ static void ExecuteImpl()
if (g_state.pending_ticks >= g_state.downcount)
break;
else if (HasPendingInterrupt() || !USE_BLOCK_LINKING)
else if (!USE_BLOCK_LINKING)
continue;
next_block_key = GetNextBlockKey();
@ -237,13 +248,21 @@ void Execute()
#ifdef WITH_RECOMPILER
void CompileDispatcher()
{
Recompiler::CodeGenerator cg(&s_code_buffer);
s_asm_dispatcher = cg.CompileDispatcher();
}
CodeBlock::HostCodePointer* GetFastMapPointer()
{
return s_fast_map.data();
}
void ExecuteRecompiler()
{
#if 0
while (!g_state.frame_done)
{
TimingEvents::UpdateCPUDowncount();
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt())
{
@ -251,6 +270,10 @@ void ExecuteRecompiler()
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
while (g_state.pending_ticks < g_state.downcount)
{
const u32 pc = g_state.regs.pc;
g_state.current_instruction_pc = pc;
const u32 fast_map_index = GetFastMapIndex(pc);
@ -259,6 +282,9 @@ void ExecuteRecompiler()
TimingEvents::RunEvents();
}
#else
s_asm_dispatcher();
#endif
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
@ -266,14 +292,33 @@ void ExecuteRecompiler()
#endif
void SetUseRecompiler(bool enable)
void Reinitialize()
{
#ifdef WITH_RECOMPILER
if (s_use_recompiler == enable)
return;
s_use_recompiler = enable;
Flush();
#ifdef WITH_RECOMPILER
ShutdownFastmem();
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
#else
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
#endif
{
Panic("Failed to initialize code space");
}
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
ResetFastMap();
CompileDispatcher();
}
#endif
}
@ -285,10 +330,13 @@ void Flush()
for (const auto& it : s_blocks)
delete it.second;
s_blocks.clear();
#ifdef WITH_RECOMPILER
s_host_code_map.clear();
s_code_buffer.Reset();
ResetFastMap();
CompileDispatcher();
#endif
}
@ -344,6 +392,8 @@ CodeBlock* LookupBlock(CodeBlockKey key)
}
iter = s_blocks.emplace(key.bits, block).first;
AddBlockToHostCodeMap(block);
return block;
}
@ -370,6 +420,8 @@ bool RevalidateBlock(CodeBlock* block)
return true;
recompile:
RemoveBlockFromHostCodeMap(block);
block->instructions.clear();
if (!CompileBlock(block))
{
@ -379,6 +431,7 @@ recompile:
}
// re-add to page map again
AddBlockToHostCodeMap(block);
if (block->IsInRAM())
AddBlockToPageMap(block);
@ -425,6 +478,9 @@ bool CompileBlock(CodeBlock* block)
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
}
block->contains_loadstore_instructions |= cbi.is_load_instruction;
block->contains_loadstore_instructions |= cbi.is_store_instruction;
// instruction is decoded now
block->instructions.push_back(cbi);
pc += sizeof(cbi.instruction.bits);
@ -467,7 +523,7 @@ bool CompileBlock(CodeBlock* block)
}
#ifdef WITH_RECOMPILER
if (s_use_recompiler)
if (g_settings.IsUsingRecompiler())
{
// Ensure we're not going to run out of space while compiling this block.
if (s_code_buffer.GetFreeCodeSpace() <
@ -538,6 +594,9 @@ void FlushBlock(CodeBlock* block)
RemoveBlockFromPageMap(block);
UnlinkBlock(block);
#ifdef WITH_RECOMPILER
RemoveBlockFromHostCodeMap(block);
#endif
s_blocks.erase(iter);
delete block;
@ -599,4 +658,107 @@ void UnlinkBlock(CodeBlock* block)
block->link_successors.clear();
}
#ifdef WITH_RECOMPILER
void AddBlockToHostCodeMap(CodeBlock* block)
{
if (!g_settings.IsUsingRecompiler())
return;
auto ir = s_host_code_map.emplace(block->host_code, block);
Assert(ir.second);
}
void RemoveBlockFromHostCodeMap(CodeBlock* block)
{
if (!g_settings.IsUsingRecompiler())
return;
HostCodeMap::iterator hc_iter = s_host_code_map.find(block->host_code);
Assert(hc_iter != s_host_code_map.end());
s_host_code_map.erase(hc_iter);
}
bool InitializeFastmem()
{
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler))
{
Log_ErrorPrintf("Failed to install page fault handler");
return false;
}
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
return true;
}
void ShutdownFastmem()
{
Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
Bus::UpdateFastmemViews(false, false);
}
Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
{
if (static_cast<u8*>(fault_address) < g_state.fastmem_base ||
(static_cast<u8*>(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE)
{
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
const PhysicalMemoryAddress fastmem_address =
static_cast<PhysicalMemoryAddress>(static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - g_state.fastmem_base));
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
is_write ? "(write)" : "(read)", fastmem_address);
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(fastmem_address))
{
// this is probably a code page, since we aren't going to fault due to requiring fastmem on RAM.
const u32 code_page_index = Bus::GetRAMCodePageIndex(fastmem_address);
if (Bus::IsRAMCodePage(code_page_index))
{
InvalidateBlocksWithPageIndex(code_page_index);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
}
// use upper_bound to find the next block after the pc
HostCodeMap::iterator upper_iter =
s_host_code_map.upper_bound(reinterpret_cast<CodeBlock::HostCodePointer>(exception_pc));
if (upper_iter == s_host_code_map.begin())
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
// then decrement it by one to (hopefully) get the block we want
upper_iter--;
// find the loadstore info in the code block
CodeBlock* block = upper_iter->second;
for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
++bpi_iter)
{
const Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
if (lbi.host_pc == exception_pc)
{
// found it, do fixup
if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi))
{
// remove the backpatch entry since we won't be coming back to this one
block->loadstore_backpatch_info.erase(bpi_iter);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
else
{
Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
}
}
// we didn't find the pc in our list..
Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
#endif
} // namespace CPU::CodeCache

View File

@ -1,14 +1,28 @@
#pragma once
#include "bus.h"
#include "common/bitfield.h"
#include "common/jit_code_buffer.h"
#include "common/page_fault_handler.h"
#include "cpu_types.h"
#include <array>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
#ifdef WITH_RECOMPILER
#include "cpu_recompiler_types.h"
#endif
namespace CPU {
enum : u32
{
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
};
union CodeBlockKey
{
u32 bits;
@ -63,6 +77,12 @@ struct CodeBlock
TickCount uncached_fetch_ticks = 0;
u32 icache_line_count = 0;
#ifdef WITH_RECOMPILER
std::vector<Recompiler::LoadStoreBackpatchInfo> loadstore_backpatch_info;
#endif
bool contains_loadstore_instructions = false;
bool invalidated = false;
const u32 GetPC() const { return key.GetPC(); }
@ -81,11 +101,12 @@ struct CodeBlock
namespace CodeCache {
void Initialize(bool use_recompiler);
void Initialize();
void Shutdown();
void Execute();
#ifdef WITH_RECOMPILER
CodeBlock::HostCodePointer* GetFastMapPointer();
void ExecuteRecompiler();
#endif
@ -93,7 +114,7 @@ void ExecuteRecompiler();
void Flush();
/// Changes whether the recompiler is enabled.
void SetUseRecompiler(bool enable);
void Reinitialize();
/// Invalidates all blocks which are in the range of the specified code page.
void InvalidateBlocksWithPageIndex(u32 page_index);
@ -102,6 +123,18 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block);
void InterpretUncachedBlock();
/// Invalidates any code pages which overlap the specified range.
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
{
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
for (u32 page = start_page; page <= end_page; page++)
{
if (Bus::m_ram_code_bits[page])
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
}
}
}; // namespace CodeCache
} // namespace CPU

View File

@ -1,4 +1,5 @@
#include "cpu_core.h"
#include "bus.h"
#include "common/align.h"
#include "common/file_system.h"
#include "common/log.h"
@ -123,7 +124,6 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value);
sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
if (!GTE::DoState(sw))
return false;
@ -231,7 +231,16 @@ void RaiseException(u32 CAUSE_bits, u32 EPC)
void SetExternalInterrupt(u8 bit)
{
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
{
g_state.interrupt_delay = 1;
}
else
{
g_state.interrupt_delay = 0;
CheckForPendingInterrupt();
}
}
void ClearExternalInterrupt(u8 bit)
@ -397,6 +406,7 @@ ALWAYS_INLINE_RELEASE static void WriteCop0Reg(Cop0Reg reg, u32 value)
g_state.cop0_regs.sr.bits =
(g_state.cop0_regs.sr.bits & ~Cop0Registers::SR::WRITE_MASK) | (value & Cop0Registers::SR::WRITE_MASK);
Log_DebugPrintf("COP0 SR <- %08X (now %08X)", value, g_state.cop0_regs.sr.bits);
CheckForPendingInterrupt();
}
break;
@ -405,6 +415,7 @@ ALWAYS_INLINE_RELEASE static void WriteCop0Reg(Cop0Reg reg, u32 value)
g_state.cop0_regs.cause.bits =
(g_state.cop0_regs.cause.bits & ~Cop0Registers::CAUSE::WRITE_MASK) | (value & Cop0Registers::CAUSE::WRITE_MASK);
Log_DebugPrintf("COP0 CAUSE <- %08X (now %08X)", value, g_state.cop0_regs.cause.bits);
CheckForPendingInterrupt();
}
break;
@ -1218,6 +1229,7 @@ restart_instruction:
// restore mode
g_state.cop0_regs.sr.mode_bits =
(g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2);
CheckForPendingInterrupt();
}
break;
@ -1367,6 +1379,21 @@ restart_instruction:
}
}
void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
GTE::ExecuteInstruction(g_state.next_instruction.bits);
// Interrupt raising occurs before the start of the instruction.
RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc);
}
template<PGXPMode pgxp_mode>
static void ExecuteImpl()
{
@ -1376,9 +1403,10 @@ static void ExecuteImpl()
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt())
if (HasPendingInterrupt() && !g_state.interrupt_delay)
DispatchInterrupt();
g_state.interrupt_delay = false;
g_state.pending_ticks++;
// now executing the instruction we previously fetched
@ -1536,6 +1564,11 @@ bool InterpretInstructionPGXP()
return g_state.exception_raised;
}
void UpdateFastmemMapping()
{
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
}
} // namespace Recompiler::Thunks
} // namespace CPU

View File

@ -79,8 +79,8 @@ struct State
// GTE registers are stored here so we can access them on ARM with a single instruction
GTE::Regs gte_regs = {};
// data cache (used as scratchpad)
std::array<u8, DCACHE_SIZE> dcache = {};
u8* fastmem_base = nullptr;
std::array<u32, ICACHE_LINES> icache_tags = {};
std::array<u8, ICACHE_SIZE> icache_data = {};
};

View File

@ -1,5 +1,6 @@
#pragma once
#include "cpu_core.h"
#include "bus.h"
namespace CPU {
@ -7,33 +8,20 @@ namespace CPU {
void RaiseException(Exception excode);
void RaiseException(u32 CAUSE_bits, u32 EPC);
ALWAYS_INLINE static bool HasPendingInterrupt()
ALWAYS_INLINE bool HasPendingInterrupt()
{
// const bool do_interrupt = g_state.m_cop0_regs.sr.IEc && ((g_state.m_cop0_regs.cause.Ip & g_state.m_cop0_regs.sr.Im)
// != 0);
const bool do_interrupt = g_state.cop0_regs.sr.IEc &&
return g_state.cop0_regs.sr.IEc &&
(((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
const bool interrupt_delay = g_state.interrupt_delay;
g_state.interrupt_delay = false;
return do_interrupt && !interrupt_delay;
}
ALWAYS_INLINE static void DispatchInterrupt()
ALWAYS_INLINE void CheckForPendingInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
if (g_state.next_instruction.IsCop2Instruction())
return;
// Interrupt raising occurs before the start of the instruction.
RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc);
if (HasPendingInterrupt())
g_state.downcount = 0;
}
void DispatchInterrupt();
// icache stuff
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
{
@ -72,5 +60,7 @@ bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
} // namespace CPU

View File

@ -19,8 +19,7 @@ u32 CodeGenerator::CalculateRegisterOffset(Reg reg)
return u32(offsetof(State, regs.r[0]) + (static_cast<u32>(reg) * sizeof(u32)));
}
bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code,
u32* out_host_code_size)
bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
{
// TODO: Align code buffer.
@ -40,8 +39,10 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
m_current_instruction = cbi;
if (!CompileInstruction(*cbi))
{
m_current_instruction = nullptr;
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
@ -60,6 +61,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
m_current_instruction = nullptr;
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
@ -845,8 +847,8 @@ void CodeGenerator::BlockPrologue()
// we don't know the state of the last block, so assume load delays might be in progress
// TODO: Pull load delay into register cache
m_current_instruction_in_branch_delay_slot_dirty = true;
m_branch_was_taken_dirty = true;
m_current_instruction_in_branch_delay_slot_dirty = g_settings.cpu_recompiler_memory_exceptions;
m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions;
m_current_instruction_was_branch_taken_dirty = false;
m_load_delay_dirty = true;
@ -909,7 +911,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
return;
}
if (cbi.is_branch_delay_slot)
if (cbi.is_branch_delay_slot && g_settings.cpu_recompiler_memory_exceptions)
{
// m_current_instruction_in_branch_delay_slot = true
EmitStoreCPUStructField(offsetof(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
@ -1895,7 +1897,22 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
value = AndValues(value, Value::FromConstantU32(write_mask));
}
// changing SR[Isc] needs to update fastmem views
if (reg == Cop0Reg::SR && g_settings.cpu_fastmem)
{
LabelType skip_fastmem_update;
Value old_value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset);
EmitStoreCPUStructField(offset, value);
EmitXor(old_value.host_reg, old_value.host_reg, value);
EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_fastmem_update);
EmitFunctionCall(nullptr, &Thunks::UpdateFastmemMapping, m_register_cache.GetCPUPtr());
EmitBindLabel(&skip_fastmem_update);
}
else
{
EmitStoreCPUStructField(offset, value);
}
}
}
@ -1913,21 +1930,8 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
EmitBranchIfBitClear(sr_value.host_reg, sr_value.size, 0, &no_interrupt);
EmitAnd(sr_value.host_reg, sr_value.host_reg, cause_value);
EmitTest(sr_value.host_reg, Value::FromConstantU32(0xFF00));
sr_value.ReleaseAndClear();
cause_value.ReleaseAndClear();
EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
EmitBranch(GetCurrentFarCodePointer());
SwitchToFarCode();
// we want to flush pc here
m_register_cache.PushState();
m_register_cache.FlushAllGuestRegisters(false, true);
WriteNewPC(CalculatePC(), false);
EmitExceptionExit();
m_register_cache.PopState();
SwitchToNearCode();
EmitStoreCPUStructField(offsetof(State, downcount), Value::FromConstantU32(0));
EmitBindLabel(&no_interrupt);
}
@ -1962,6 +1966,16 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
EmitStoreCPUStructField(offsetof(State, cop0_regs.sr.bits), sr);
Value cause_value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, offsetof(State, cop0_regs.cause.bits));
LabelType no_interrupt;
EmitAnd(sr.host_reg, sr.host_reg, cause_value);
EmitTest(sr.host_reg, Value::FromConstantU32(0xFF00));
EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
EmitStoreCPUStructField(offsetof(State, downcount), Value::FromConstantU32(0));
EmitBindLabel(&no_interrupt);
InstructionEpilogue(cbi);
return true;
}

View File

@ -23,7 +23,11 @@ public:
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
CodeBlock::HostCodePointer CompileDispatcher();
//////////////////////////////////////////////////////////////////////////
// Code Generation
@ -67,10 +71,15 @@ public:
void EmitAddCPUStructField(u32 offset, const Value& value);
void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr);
void EmitStoreGlobal(void* ptr, const Value& value);
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code);
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code);
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
@ -201,9 +210,10 @@ private:
bool Compile_cop2(const CodeBlockInstruction& cbi);
JitCodeBuffer* m_code_buffer;
const CodeBlock* m_block = nullptr;
CodeBlock* m_block = nullptr;
const CodeBlockInstruction* m_block_start = nullptr;
const CodeBlockInstruction* m_block_end = nullptr;
const CodeBlockInstruction* m_current_instruction = nullptr;
RegisterCache m_register_cache;
CodeEmitter m_near_emitter;
CodeEmitter m_far_emitter;

View File

@ -6,6 +6,7 @@
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(CPU::Recompiler);
namespace a64 = vixl::aarch64;
@ -13,6 +14,7 @@ namespace a64 = vixl::aarch64;
namespace CPU::Recompiler {
constexpr HostReg RCPUPTR = 19;
constexpr HostReg RMEMBASEPTR = 20;
constexpr HostReg RRETURN = 0;
constexpr HostReg RARG1 = 0;
constexpr HostReg RARG2 = 1;
@ -26,6 +28,16 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
}
static const a64::WRegister GetHostReg8(HostReg reg)
{
return a64::WRegister(reg);
@ -75,6 +87,11 @@ static const a64::XRegister GetCPUPtrReg()
return GetHostReg64(RCPUPTR);
}
static const a64::XRegister GetFastmemBasePtrReg()
{
return GetHostReg64(RMEMBASEPTR);
}
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
@ -172,19 +189,31 @@ void CodeGenerator::EmitBeginBlock()
// Save the link register, since we'll be calling functions.
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
DebugAssert(link_reg_allocated);
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer. TODO: make this better.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// If there's loadstore instructions, preload the fastmem base.
if (m_block->contains_loadstore_instructions)
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
}
}
void CodeGenerator::EmitEndBlock()
{
if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -200,6 +229,7 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -958,13 +988,6 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
m_register_cache.PopCallerSavedRegisters();
}
static s64 GetBranchDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
}
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
{
if (return_value)
@ -974,7 +997,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
const u32 adjust_size = PrepareStackForCall();
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1009,7 +1032,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG1, arg1);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1045,7 +1068,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG2, arg2);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1083,7 +1106,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG3, arg3);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1122,7 +1145,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
EmitCopyValue(RARG4, arg4);
// actually call the function
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
const bool use_blr = !vixl::IsInt26(displacement);
if (use_blr)
{
@ -1283,14 +1306,124 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
if (address.IsConstant())
{
TickCount read_ticks;
void* ptr = GetDirectReadMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
&read_ticks);
if (ptr)
{
Value result = m_register_cache.AllocateScratch(size);
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
m_delayed_cycles_add += read_ticks;
return result;
}
}
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_64);
if (g_settings.IsUsingFastmem())
{
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
bpi.host_pc = GetCurrentNearCodePointer();
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
break;
default:
UnreachableCode();
break;
}
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
// NOTE: This can leave junk in the upper bits
switch (size)
{
@ -1319,6 +1452,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->Bind(&load_okay);
// load exception path
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (-result << 2) | BD | cop_n
@ -1330,37 +1464,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
else
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (size)
{
case RegSize_8:
@ -1379,19 +1490,73 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
UnreachableCode();
break;
}
}
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
if (address.IsConstant())
{
void* ptr = GetDirectWriteMemoryPointer(
static_cast<u32>(address.constant_value),
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
if (ptr)
{
EmitStoreGlobal(ptr, value);
return;
}
}
AddPendingCycles(true);
if (g_settings.IsUsingFastmem())
{
// we need the value in a host register to store it
Value value_in_hr = GetValueInHostRegister(value);
EmitStoreGuestMemoryFastmem(cbi, address, value_in_hr);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
}
}
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
a64::MemOperand actual_address;
if (address.IsConstant())
{
m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH));
bpi.host_pc = GetCurrentNearCodePointer();
}
else
{
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
}
switch (value.size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
m_emit->Strb(GetHostReg8(value), actual_address);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
m_emit->Strh(GetHostReg16(value), actual_address);
break;
case RegSize_32:
m_emit->Str(GetHostReg32(value), actual_address);
break;
default:
@ -1399,19 +1564,33 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
break;
}
return result;
}
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
// return to the block code
EmitBranch(GetCurrentNearCodePointer(), false);
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value, bool in_far_code)
{
AddPendingCycles(true);
if (g_settings.cpu_recompiler_memory_exceptions)
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
Assert(!in_far_code);
Value result = m_register_cache.AllocateScratch(RegSize_32);
switch (value.size)
{
case RegSize_8:
@ -1439,6 +1618,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->Bind(&store_okay);
// store exception path
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (result << 2) | BD | cop_n
@ -1448,6 +1628,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
if (!in_far_code)
EmitExceptionExit();
SwitchToNearCode();
@ -1455,8 +1636,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (value.size)
{
case RegSize_8:
@ -1478,14 +1657,76 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
}
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
// check jump distance
const s64 jump_distance =
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.host_slowmem_pc) - reinterpret_cast<intptr_t>(lbi.host_pc));
Assert(Common::IsAligned(jump_distance, 4));
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
// turn it into a jump to the slowmem handler
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size,
a64::PositionDependentCode);
emit.b(jump_distance >> 2);
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
emit.nop();
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
return true;
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
Panic("Not implemented");
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (size)
{
case RegSize_8:
m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_16:
m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_32:
m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
{
Panic("Not implemented");
Value value_in_hr = GetValueInHostRegister(value);
EmitLoadGlobalAddress(RSCRATCH, ptr);
switch (value.size)
{
case RegSize_8:
m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_16:
m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
case RegSize_32:
m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
break;
default:
UnreachableCode();
break;
}
}
void CodeGenerator::EmitFlushInterpreterLoadDelay()
@ -1814,4 +2055,136 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->Bind(label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const void* current_code_ptr_page = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
const void* ptr_page =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
const s64 page_displacement = GetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
{
m_emit->adrp(GetHostReg64(host_reg), page_displacement);
m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset);
}
else
{
m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr));
}
}
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
{
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_register_cache.ReserveCallerSavedRegisters();
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop;
a64::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitFunctionCall(nullptr, &DispatchInterrupt);
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// main dispatch loop
a64::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block
// w8 <- pc
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
// w9 <- (pc & RAM_MASK) >> 2
m_emit->and_(a64::w9, a64::w8, Bus::RAM_MASK);
m_emit->lsr(a64::w9, a64::w9, 2);
// w10 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->and_(a64::w10, a64::w8, Bus::BIOS_MASK);
m_emit->lsr(a64::w10, a64::w10, 2);
m_emit->add(a64::w10, a64::w10, FAST_MAP_RAM_SLOT_COUNT);
// current_instruction_pc <- pc (eax)
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
// if ((w8 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use w10 as index }
m_emit->and_(a64::w8, a64::w8, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->Mov(a64::w11, Bus::BIOS_BASE);
m_emit->cmp(a64::w8, a64::w11);
m_emit->csel(a64::w8, a64::w9, a64::w10, a64::lt);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(9, CodeCache::GetFastMapPointer());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x9, a64::MemOperand(a64::x9));
m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount)));
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&frame_done_loop, a64::lt);
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
m_emit->b(&frame_done_loop);
// all done
m_emit->Bind(&exit_dispatcher);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
m_emit->Ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return ptr;
}
} // namespace CPU::Recompiler

View File

@ -1,14 +1,19 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
#include "settings.h"
#include "timing_event.h"
Log_SetChannel(Recompiler::CodeGenerator);
namespace CPU::Recompiler {
#if defined(ABI_WIN64)
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
constexpr HostReg RARG1 = Xbyak::Operand::RCX;
constexpr HostReg RARG2 = Xbyak::Operand::RDX;
@ -18,6 +23,7 @@ constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16;
#elif defined(ABI_SYSV)
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
constexpr HostReg RARG1 = Xbyak::Operand::RDI;
constexpr HostReg RARG2 = Xbyak::Operand::RSI;
@ -76,6 +82,11 @@ static const Xbyak::Reg64 GetCPUPtrReg()
return GetHostReg64(RCPUPTR);
}
static const Xbyak::Reg64 GetFastmemBasePtrReg()
{
return GetHostReg64(RMEMBASEPTR);
}
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
: m_code_buffer(code_buffer), m_register_cache(*this),
m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
@ -137,7 +148,6 @@ void CodeGenerator::InitHostRegs()
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI,
Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12,
Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15});
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
#elif defined(ABI_SYSV)
m_register_cache.SetHostRegAllocationOrder(
{Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
@ -151,8 +161,9 @@ void CodeGenerator::InitHostRegs()
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP,
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14,
Xbyak::Operand::R15});
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
#endif
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
}
void CodeGenerator::SwitchToFarCode()
@ -187,15 +198,28 @@ Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_
void CodeGenerator::EmitBeginBlock()
{
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
// Store the CPU struct pointer.
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
DebugAssert(cpu_reg_allocated);
m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
// If there's loadstore instructions, preload the fastmem base.
if (m_block->contains_loadstore_instructions)
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
}
}
void CodeGenerator::EmitEndBlock()
{
m_register_cache.FreeHostReg(RCPUPTR);
if (m_block->contains_loadstore_instructions)
m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
@ -1738,14 +1762,158 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
{
if (address.IsConstant())
{
TickCount read_ticks;
void* ptr = GetDirectReadMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
&read_ticks);
if (ptr)
{
Value result = m_register_cache.AllocateScratch(size);
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
m_delayed_cycles_add += read_ticks;
return result;
}
}
AddPendingCycles(true);
Value result = m_register_cache.AllocateScratch(RegSize_64);
if (g_settings.IsUsingFastmem())
{
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = result.host_reg;
bpi.guest_pc = m_current_instruction->pc;
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
actual_address = &result;
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
bpi.host_pc = GetCurrentNearCodePointer();
}
// TODO: movsx/zx inline here
switch (size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg8(result.host_reg),
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg16(result.host_reg),
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
}
else
{
m_emit->mov(GetHostReg32(result.host_reg),
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
}
}
break;
}
// TODO: BIOS reads...
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
for (u32 i = 0; i < nops; i++)
m_emit->nop();
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
// return to the block code
m_emit->jmp(GetCurrentNearCodePointer());
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
Value& result, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
// We need to use the full 64 bits here since we test the sign bit result.
Value result = m_register_cache.AllocateScratch(RegSize_64);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
// NOTE: This can leave junk in the upper bits
switch (size)
{
@ -1772,6 +1940,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
m_register_cache.PushState();
// load exception path
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (-result << 2) | BD | cop_n
@ -1783,37 +1952,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
return result;
}
else
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (size)
{
case RegSize_8:
@ -1832,39 +1978,183 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
UnreachableCode();
break;
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
break;
default:
UnreachableCode();
break;
}
return result;
}
}
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
{
if (address.IsConstant())
{
void* ptr = GetDirectWriteMemoryPointer(
static_cast<u32>(address.constant_value),
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
if (ptr)
{
EmitStoreGlobal(ptr, value);
return;
}
}
AddPendingCycles(true);
if (g_settings.IsUsingFastmem())
{
EmitStoreGuestMemoryFastmem(cbi, address, value);
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
}
}
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value)
{
// fastmem
LoadStoreBackpatchInfo bpi;
bpi.host_pc = GetCurrentNearCodePointer();
bpi.address_host_reg = HostReg_Invalid;
bpi.value_host_reg = value.host_reg;
bpi.guest_pc = m_current_instruction->pc;
// can't store displacements > 0x80000000 in-line
const Value* actual_address = &address;
Value temp_address;
if (address.IsConstant() && address.constant_value >= 0x80000000)
{
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
actual_address = &temp_address;
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
bpi.host_pc = GetCurrentNearCodePointer();
}
switch (value.size)
{
case RegSize_8:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg8(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg8(value.host_reg));
}
}
}
break;
case RegSize_16:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg16(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg16(value.host_reg));
}
}
}
break;
case RegSize_32:
{
if (actual_address->IsConstant())
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
GetHostReg32(value.host_reg));
}
}
else
{
if (value.IsConstant())
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
value.constant_value);
}
else
{
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
GetHostReg32(value.host_reg));
}
}
}
break;
}
// insert nops, we need at least 5 bytes for a relative jump
const u32 fastmem_size =
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
for (u32 i = 0; i < nops; i++)
m_emit->nop();
bpi.host_code_size = static_cast<u32>(
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
// generate slowmem fallback
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
SwitchToFarCode();
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
// return to the block code
m_emit->jmp(GetCurrentNearCodePointer());
SwitchToNearCode();
m_block->loadstore_backpatch_info.push_back(bpi);
}
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
const Value& value, bool in_far_code)
{
if (g_settings.cpu_recompiler_memory_exceptions)
{
Value result = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
Assert(!in_far_code);
Value result = m_register_cache.AllocateScratch(RegSize_32);
switch (value.size)
{
case RegSize_8:
@ -1890,24 +2180,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
m_emit->jnz(GetCurrentFarCodePointer());
// store exception path
if (!in_far_code)
SwitchToFarCode();
// cause_bits = (result << 2) | BD | cop_n
m_emit->shl(GetHostReg32(result.host_reg), 2);
m_emit->or_(GetHostReg32(result.host_reg),
m_emit->shl(GetHostReg32(result), 2);
m_emit->or_(GetHostReg32(result),
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
cbi.instruction.cop.cop_n));
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
EmitExceptionExit();
if (!in_far_code)
SwitchToNearCode();
m_register_cache.PopState();
}
else
{
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
switch (value.size)
{
case RegSize_8:
@ -1929,6 +2219,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
}
}
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
{
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
// turn it into a jump to the slowmem handler
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
cg.jmp(lbi.host_slowmem_pc);
const s32 nops = static_cast<s32>(lbi.host_code_size) -
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(lbi.host_pc)));
Assert(nops >= 0);
for (s32 i = 0; i < nops; i++)
cg.nop();
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
return true;
}
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
{
const s64 displacement =
@ -2486,4 +2794,123 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
m_emit->L(*label);
}
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
{
const s64 displacement =
static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]);
else
m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
}
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
{
m_register_cache.ReserveCallerSavedRegisters();
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop;
Xbyak::Label exit_dispatcher;
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitFunctionCall(nullptr, &DispatchInterrupt);
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
// main dispatch loop
Xbyak::Label main_loop;
m_emit->align(16);
m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block
// eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
// ebx <- (pc & RAM_MASK) >> 2
m_emit->mov(m_emit->ebx, m_emit->eax);
m_emit->and_(m_emit->ebx, Bus::RAM_MASK);
m_emit->shr(m_emit->ebx, 2);
// ecx <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
m_emit->mov(m_emit->ecx, m_emit->eax);
m_emit->and_(m_emit->ecx, Bus::BIOS_MASK);
m_emit->shr(m_emit->ecx, 2);
m_emit->add(m_emit->ecx, FAST_MAP_RAM_SLOT_COUNT);
// current_instruction_pc <- pc (eax)
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, current_instruction_pc)], m_emit->eax);
// if ((eax (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use ecx as index }
m_emit->and_(m_emit->eax, PHYSICAL_MEMORY_ADDRESS_MASK);
m_emit->cmp(m_emit->eax, Bus::BIOS_BASE);
m_emit->cmovge(m_emit->ebx, m_emit->ecx);
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
EmitLoadGlobalAddress(Xbyak::Operand::RAX, CodeCache::GetFastMapPointer());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax + m_emit->rbx * 8]);
m_emit->call(m_emit->rax);
m_emit->jmp(main_loop);
// end while
m_emit->L(downcount_hit);
// check events then for frame done
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
m_emit->jg(frame_done_loop);
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
m_emit->jmp(frame_done_loop);
// all done
m_emit->L(exit_dispatcher);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();
CodeBlock::HostCodePointer ptr;
u32 code_size;
FinalizeBlock(&ptr, &code_size);
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
return ptr;
}
} // namespace CPU::Recompiler

View File

@ -351,6 +351,33 @@ u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
return count;
}
void RegisterCache::ReserveCallerSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
void RegisterCache::AssumeCalleeSavedRegistersAreSaved()
{
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved;
}
}
}
void RegisterCache::PushState()
{
// need to copy this manually because of the load delay values

View File

@ -248,6 +248,12 @@ public:
/// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters(bool commit);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCallerSavedRegisters();
/// Removes the callee-saved register flag from all registers. Call when compiling code blocks.
void AssumeCalleeSavedRegistersAreSaved();
/// Pushes the register allocator state, use when entering branched code.
void PushState();

View File

@ -32,6 +32,7 @@ void UncheckedWriteMemoryByte(u32 address, u8 value);
void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
void UncheckedWriteMemoryWord(u32 address, u32 value);
void UpdateFastmemMapping();
} // namespace Recompiler::Thunks

View File

@ -127,6 +127,16 @@ constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = false;
#endif
struct LoadStoreBackpatchInfo
{
void* host_pc; // pointer to instruction which will fault
void* host_slowmem_pc; // pointer to slowmem callback code
u32 host_code_size; // size of the fastmem load as well as the add for cycles
HostReg address_host_reg; // register containing the guest address to load/store
HostReg value_host_reg; // register containing the source/destination
PhysicalMemoryAddress guest_pc;
};
} // namespace Recompiler
} // namespace CPU

View File

@ -4,6 +4,7 @@
#include "common/log.h"
#include "common/state_wrapper.h"
#include "common/string_util.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
#include "gpu.h"
#include "interrupt_controller.h"
@ -499,7 +500,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
const u32 terminator = UINT32_C(0xFFFFFF);
std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator));
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}
@ -547,6 +548,6 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
}
}
Bus::InvalidateCodePages(address, word_count);
CPU::CodeCache::InvalidateCodePages(address, word_count);
return Bus::GetDMARAMTickCount(word_count);
}

View File

@ -367,6 +367,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
si.SetBoolValue("CPU", "ICache", false);
si.SetBoolValue("CPU", "Fastmem", true);
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
si.SetIntValue("GPU", "ResolutionScale", 1);
@ -513,12 +514,13 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
if (g_settings.emulation_speed != old_settings.emulation_speed)
System::UpdateThrottlePeriod();
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode)
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode ||
g_settings.cpu_fastmem != old_settings.cpu_fastmem)
{
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
CPU::CodeCache::Flush();
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode%s.",
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode),
g_settings.cpu_fastmem ? " (fastmem)" : "");
CPU::CodeCache::Reinitialize();
CPU::ClearICache();
}

View File

@ -96,6 +96,7 @@ void Settings::Load(SettingsInterface& si)
.value_or(DEFAULT_CPU_EXECUTION_MODE);
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true);
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER);
@ -217,6 +218,7 @@ void Settings::Save(SettingsInterface& si) const
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
si.SetBoolValue("CPU", "Fastmem", cpu_fastmem);
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());

View File

@ -73,6 +73,7 @@ struct Settings
bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false;
bool cpu_thread = true;
bool cpu_fastmem = true;
float emulation_speed = 1.0f;
bool speed_limiter_enabled = true;
@ -173,6 +174,11 @@ struct Settings
return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled;
}
ALWAYS_INLINE bool IsUsingFastmem() const
{
return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions);
}
bool HasAnyPerGameMemoryCards() const;
enum : u32

View File

@ -727,12 +727,14 @@ bool Initialize(bool force_software_renderer)
TimingEvents::Initialize();
CPU::Initialize();
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
Bus::Initialize();
if (!Bus::Initialize())
return false;
CPU::CodeCache::Initialize();
g_gpu.Initialize();
g_dma.Initialize();
g_interrupt_controller.Initialize();
g_cdrom.Initialize();

View File

@ -13,7 +13,6 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0;
static u32 s_last_event_run_time = 0;
u32 GetGlobalTickCounter()
{
@ -28,7 +27,6 @@ void Initialize()
void Reset()
{
s_global_tick_counter = 0;
s_last_event_run_time = 0;
}
void Shutdown()
@ -53,6 +51,11 @@ void UpdateCPUDowncount()
CPU::g_state.downcount = s_active_events_head->GetDowncount();
}
TimingEvent** GetHeadEventPtr()
{
return &s_active_events_head;
}
static void SortEvent(TimingEvent* event)
{
const TickCount event_downcount = event->m_downcount;
@ -255,7 +258,7 @@ void RunEvents()
{
DebugAssert(!s_current_event);
TickCount pending_ticks = (s_global_tick_counter + CPU::GetPendingTicks()) - s_last_event_run_time;
TickCount pending_ticks = CPU::GetPendingTicks();
CPU::ResetPendingTicks();
while (pending_ticks > 0)
{
@ -291,7 +294,6 @@ void RunEvents()
}
}
s_last_event_run_time = s_global_tick_counter;
s_current_event = nullptr;
UpdateCPUDowncount();
}
@ -333,8 +335,6 @@ bool DoState(StateWrapper& sw)
event->m_interval = interval;
}
sw.Do(&s_last_event_run_time);
Log_DevPrintf("Loaded %u events from save state.", event_count);
SortEvents();
}
@ -352,8 +352,6 @@ bool DoState(StateWrapper& sw)
sw.Do(&event->m_interval);
}
sw.Do(&s_last_event_run_time);
Log_DevPrintf("Wrote %u events to save state.", s_active_event_count);
}

View File

@ -88,6 +88,8 @@ void RunEvents();
void UpdateCPUDowncount();
TimingEvent** GetHeadEventPtr();
} // namespace TimingEventManager

View File

@ -129,6 +129,6 @@ enum : u32
enum : u32
{
CPU_CODE_CACHE_PAGE_SIZE = 1024,
CPU_CODE_CACHE_PAGE_SIZE = 4096,
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
};