Merge branch 'fastmem2' into threaded-gpu
This commit is contained in:
commit
9d49ed6299
|
@ -56,6 +56,10 @@ add_library(common
|
|||
minizip_helpers.h
|
||||
null_audio_stream.cpp
|
||||
null_audio_stream.h
|
||||
memory_arena.cpp
|
||||
memory_arena.h
|
||||
page_fault_handler.cpp
|
||||
page_fault_handler.h
|
||||
rectangle.h
|
||||
progress_callback.cpp
|
||||
progress_callback.h
|
||||
|
@ -180,3 +184,8 @@ if(APPLE AND NOT BUILD_LIBRETRO_CORE)
|
|||
gl/context_agl.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
# We need -lrt for shm_unlink
|
||||
target_link_libraries(common PRIVATE rt)
|
||||
endif()
|
||||
|
|
|
@ -70,6 +70,8 @@
|
|||
<ClInclude Include="md5_digest.h" />
|
||||
<ClInclude Include="null_audio_stream.h" />
|
||||
<ClInclude Include="progress_callback.h" />
|
||||
<ClInclude Include="memory_arena.h" />
|
||||
<ClInclude Include="page_fault_handler.h" />
|
||||
<ClInclude Include="rectangle.h" />
|
||||
<ClInclude Include="cd_subchannel_replacement.h" />
|
||||
<ClInclude Include="scope_guard.h" />
|
||||
|
@ -130,6 +132,8 @@
|
|||
<ClCompile Include="null_audio_stream.cpp" />
|
||||
<ClCompile Include="progress_callback.cpp" />
|
||||
<ClCompile Include="shiftjis.cpp" />
|
||||
<ClCompile Include="memory_arena.cpp" />
|
||||
<ClCompile Include="page_fault_handler.cpp" />
|
||||
<ClCompile Include="state_wrapper.cpp" />
|
||||
<ClCompile Include="cd_xa.cpp" />
|
||||
<ClCompile Include="string.cpp" />
|
||||
|
|
|
@ -103,6 +103,8 @@
|
|||
<ClInclude Include="win32_progress_callback.h" />
|
||||
<ClInclude Include="make_array.h" />
|
||||
<ClInclude Include="shiftjis.h" />
|
||||
<ClInclude Include="memory_arena.h" />
|
||||
<ClInclude Include="page_fault_handler.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="jit_code_buffer.cpp" />
|
||||
|
@ -198,6 +200,8 @@
|
|||
<ClCompile Include="minizip_helpers.cpp" />
|
||||
<ClCompile Include="win32_progress_callback.cpp" />
|
||||
<ClCompile Include="shiftjis.cpp" />
|
||||
<ClCompile Include="memory_arena.cpp" />
|
||||
<ClCompile Include="page_fault_handler.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Natvis Include="bitfield.natvis" />
|
||||
|
|
|
@ -0,0 +1,213 @@
|
|||
#include "memory_arena.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "common/string_util.h"
|
||||
Log_SetChannel(Common::MemoryArena);
|
||||
|
||||
#if defined(WIN32)
|
||||
#include "common/windows_headers.h"
|
||||
#elif defined(__linux__) || defined(__ANDROID__)
|
||||
#include <cerrno>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
MemoryArena::MemoryArena() = default;
|
||||
|
||||
MemoryArena::~MemoryArena()
|
||||
{
|
||||
#if defined(WIN32)
|
||||
if (m_file_handle)
|
||||
CloseHandle(m_file_handle);
|
||||
#elif defined(__linux__)
|
||||
if (m_shmem_fd > 0)
|
||||
close(m_shmem_fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
void* MemoryArena::FindBaseAddressForMapping(size_t size)
|
||||
{
|
||||
void* base_address;
|
||||
#if defined(WIN32)
|
||||
base_address = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE);
|
||||
if (base_address)
|
||||
VirtualFree(base_address, 0, MEM_RELEASE);
|
||||
#elif defined(__linux__)
|
||||
base_address = mmap(nullptr, size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
if (base_address)
|
||||
munmap(base_address, size);
|
||||
#elif defined(__ANDROID__)
|
||||
base_address = mmap(nullptr, size, PROT_NONE, MAP_ANON | MAP_SHARED, -1, 0);
|
||||
if (base_address)
|
||||
munmap(base_address, size);
|
||||
#else
|
||||
base_address = nullptr;
|
||||
#endif
|
||||
|
||||
if (!base_address)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to get base address for memory mapping of size %zu", size);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return base_address;
|
||||
}
|
||||
|
||||
bool MemoryArena::Create(size_t size, bool writable, bool executable)
|
||||
{
|
||||
#if defined(WIN32)
|
||||
const std::string file_mapping_name =
|
||||
StringUtil::StdStringFromFormat("common_memory_arena_%zu_%u", size, GetCurrentProcessId());
|
||||
|
||||
const DWORD protect = (writable ? (executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE) : PAGE_READONLY);
|
||||
m_file_handle = CreateFileMappingA(INVALID_HANDLE_VALUE, nullptr, protect, Truncate32(size >> 32), Truncate32(size),
|
||||
file_mapping_name.c_str());
|
||||
if (!m_file_handle)
|
||||
{
|
||||
Log_ErrorPrintf("CreateFileMapping failed: %u", GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#elif defined(__linux__)
|
||||
const std::string file_mapping_name =
|
||||
StringUtil::StdStringFromFormat("common_memory_arena_%zu_%u", size, static_cast<unsigned>(getpid()));
|
||||
m_shmem_fd = shm_open(file_mapping_name.c_str(), O_CREAT | O_EXCL | (writable ? O_RDWR : O_RDONLY), 0600);
|
||||
if (m_shmem_fd < 0)
|
||||
{
|
||||
Log_ErrorPrintf("shm_open failed: %d", errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
// we're not going to be opening this mapping in other processes, so remove the file
|
||||
shm_unlink(file_mapping_name.c_str());
|
||||
|
||||
// ensure it's the correct size
|
||||
if (ftruncate64(m_shmem_fd, static_cast<off64_t>(size)) < 0)
|
||||
{
|
||||
Log_ErrorPrintf("ftruncate64(%zu) failed: %d", size, errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::optional<MemoryArena::View> MemoryArena::CreateView(size_t offset, size_t size, bool writable, bool executable,
|
||||
void* fixed_address)
|
||||
{
|
||||
void* base_pointer = CreateViewPtr(offset, size, writable, executable, fixed_address);
|
||||
if (!base_pointer)
|
||||
return std::nullopt;
|
||||
|
||||
return View(this, base_pointer, offset, size, writable);
|
||||
}
|
||||
|
||||
void* MemoryArena::CreateViewPtr(size_t offset, size_t size, bool writable, bool executable,
|
||||
void* fixed_address /*= nullptr*/)
|
||||
{
|
||||
void* base_pointer;
|
||||
#if defined(WIN32)
|
||||
const DWORD desired_access = FILE_MAP_READ | (writable ? FILE_MAP_WRITE : 0) | (executable ? FILE_MAP_EXECUTE : 0);
|
||||
base_pointer =
|
||||
MapViewOfFileEx(m_file_handle, desired_access, Truncate32(offset >> 32), Truncate32(offset), size, fixed_address);
|
||||
if (!base_pointer)
|
||||
return nullptr;
|
||||
#elif defined(__linux__)
|
||||
const int flags = (fixed_address != nullptr) ? (MAP_SHARED | MAP_FIXED) : MAP_SHARED;
|
||||
const int prot = PROT_READ | (writable ? PROT_WRITE : 0) | (executable ? PROT_EXEC : 0);
|
||||
base_pointer = mmap64(fixed_address, size, prot, flags, m_shmem_fd, static_cast<off64_t>(offset));
|
||||
if (base_pointer == reinterpret_cast<void*>(-1))
|
||||
return nullptr;
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
|
||||
m_num_views.fetch_add(1);
|
||||
return base_pointer;
|
||||
}
|
||||
|
||||
bool MemoryArena::FlushViewPtr(void* address, size_t size)
|
||||
{
|
||||
#if defined(WIN32)
|
||||
return FlushViewOfFile(address, size);
|
||||
#elif defined(__linux__)
|
||||
return (msync(address, size, 0) >= 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool MemoryArena::ReleaseViewPtr(void* address, size_t size)
|
||||
{
|
||||
bool result;
|
||||
#if defined(WIN32)
|
||||
result = static_cast<bool>(UnmapViewOfFile(address));
|
||||
#elif defined(__linux__)
|
||||
result = (munmap(address, size) >= 0);
|
||||
#else
|
||||
result = false;
|
||||
#endif
|
||||
|
||||
if (!result)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to unmap previously-created view at %p", address);
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t prev_count = m_num_views.fetch_sub(1);
|
||||
Assert(prev_count > 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MemoryArena::SetPageProtection(void* address, size_t length, bool readable, bool writable, bool executable)
|
||||
{
|
||||
#if defined(WIN32)
|
||||
static constexpr DWORD protection_table[2][2][2] = {
|
||||
{{PAGE_NOACCESS, PAGE_EXECUTE}, {PAGE_WRITECOPY, PAGE_EXECUTE_WRITECOPY}},
|
||||
{{PAGE_READONLY, PAGE_EXECUTE_READ}, {PAGE_READWRITE, PAGE_EXECUTE_READWRITE}}};
|
||||
|
||||
DWORD old_protect;
|
||||
return static_cast<bool>(
|
||||
VirtualProtect(address, length, protection_table[readable][writable][executable], &old_protect));
|
||||
#elif defined(__linux__) || defined(__ANDROID__)
|
||||
const int prot = (readable ? PROT_READ : 0) | (writable ? PROT_WRITE : 0) | (executable ? PROT_EXEC : 0);
|
||||
return (mprotect(address, length, prot) >= 0);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemoryArena::View::View(MemoryArena* parent, void* base_pointer, size_t arena_offset, size_t mapping_size,
|
||||
bool writable)
|
||||
: m_parent(parent), m_base_pointer(base_pointer), m_arena_offset(arena_offset), m_mapping_size(mapping_size),
|
||||
m_writable(writable)
|
||||
{
|
||||
}
|
||||
|
||||
MemoryArena::View::View(View&& view)
|
||||
: m_parent(view.m_parent), m_base_pointer(view.m_base_pointer), m_arena_offset(view.m_arena_offset),
|
||||
m_mapping_size(view.m_mapping_size)
|
||||
{
|
||||
view.m_parent = nullptr;
|
||||
view.m_base_pointer = nullptr;
|
||||
view.m_arena_offset = 0;
|
||||
view.m_mapping_size = 0;
|
||||
}
|
||||
|
||||
MemoryArena::View::~View()
|
||||
{
|
||||
if (m_parent)
|
||||
{
|
||||
if (m_writable && !m_parent->FlushViewPtr(m_base_pointer, m_mapping_size))
|
||||
Panic("Failed to flush previously-created view");
|
||||
if (!m_parent->ReleaseViewPtr(m_base_pointer, m_mapping_size))
|
||||
Panic("Failed to unmap previously-created view");
|
||||
}
|
||||
}
|
||||
} // namespace Common
|
|
@ -0,0 +1,58 @@
|
|||
#pragma once
|
||||
#include "types.h"
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
|
||||
namespace Common {
|
||||
class MemoryArena
|
||||
{
|
||||
public:
|
||||
class View
|
||||
{
|
||||
public:
|
||||
View(MemoryArena* parent, void* base_pointer, size_t arena_offset, size_t mapping_size, bool writable);
|
||||
View(View&& view);
|
||||
~View();
|
||||
|
||||
void* GetBasePointer() const { return m_base_pointer; }
|
||||
size_t GetArenaOffset() const { return m_arena_offset; }
|
||||
size_t GetMappingSize() const { return m_mapping_size; }
|
||||
bool IsWritable() const { return m_writable; }
|
||||
|
||||
private:
|
||||
MemoryArena* m_parent;
|
||||
void* m_base_pointer;
|
||||
size_t m_arena_offset;
|
||||
size_t m_mapping_size;
|
||||
bool m_writable;
|
||||
};
|
||||
|
||||
MemoryArena();
|
||||
~MemoryArena();
|
||||
|
||||
static void* FindBaseAddressForMapping(size_t size);
|
||||
|
||||
bool Create(size_t size, bool writable, bool executable);
|
||||
|
||||
std::optional<View> CreateView(size_t offset, size_t size, bool writable, bool executable,
|
||||
void* fixed_address = nullptr);
|
||||
|
||||
void* CreateViewPtr(size_t offset, size_t size, bool writable, bool executable, void* fixed_address = nullptr);
|
||||
bool FlushViewPtr(void* address, size_t size);
|
||||
bool ReleaseViewPtr(void* address, size_t size);
|
||||
|
||||
static bool SetPageProtection(void* address, size_t length, bool readable, bool writable, bool executable);
|
||||
|
||||
private:
|
||||
#if defined(WIN32)
|
||||
void* m_file_handle = nullptr;
|
||||
#elif defined(__linux__)
|
||||
int m_shmem_fd = -1;
|
||||
#endif
|
||||
|
||||
std::atomic_size_t m_num_views{0};
|
||||
size_t m_size = 0;
|
||||
bool m_writable = false;
|
||||
bool m_executable = false;
|
||||
};
|
||||
} // namespace Common
|
|
@ -0,0 +1,186 @@
|
|||
#include "page_fault_handler.h"
|
||||
#include "common/log.h"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
Log_SetChannel(Common::PageFaultHandler);
|
||||
|
||||
#if defined(WIN32)
|
||||
#include "common/windows_headers.h"
|
||||
#elif defined(__linux__) || defined(__ANDROID__)
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
#include <unistd.h>
|
||||
#define USE_SIGSEGV 1
|
||||
#endif
|
||||
|
||||
namespace Common::PageFaultHandler {
|
||||
|
||||
struct RegisteredHandler
|
||||
{
|
||||
void* owner;
|
||||
Callback callback;
|
||||
};
|
||||
static std::vector<RegisteredHandler> m_handlers;
|
||||
static std::mutex m_handler_lock;
|
||||
static thread_local bool s_in_handler;
|
||||
|
||||
#if defined(WIN32)
|
||||
static PVOID s_veh_handle;
|
||||
|
||||
static LONG ExceptionHandler(PEXCEPTION_POINTERS exi)
|
||||
{
|
||||
if (exi->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION || s_in_handler)
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
|
||||
s_in_handler = true;
|
||||
|
||||
void* const exception_pc = reinterpret_cast<void*>(exi->ContextRecord->Rip);
|
||||
void* const exception_address = reinterpret_cast<void*>(exi->ExceptionRecord->ExceptionInformation[1]);
|
||||
bool const is_write = exi->ExceptionRecord->ExceptionInformation[0] == 1;
|
||||
|
||||
std::lock_guard<std::mutex> guard(m_handler_lock);
|
||||
for (const RegisteredHandler& rh : m_handlers)
|
||||
{
|
||||
if (rh.callback(exception_pc, exception_address, is_write) == HandlerResult::ContinueExecution)
|
||||
{
|
||||
s_in_handler = false;
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
}
|
||||
|
||||
s_in_handler = false;
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
#elif defined(USE_SIGSEGV)
|
||||
|
||||
static struct sigaction s_old_sigsegv_action;
|
||||
|
||||
static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
|
||||
{
|
||||
if ((info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR) || s_in_handler)
|
||||
return;
|
||||
|
||||
void* const exception_address = reinterpret_cast<void*>(info->si_addr);
|
||||
|
||||
#if defined(__x86_64__)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_RIP]);
|
||||
const bool is_write = (static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_ERR] & 2) != 0;
|
||||
#elif defined(__aarch64__)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
|
||||
const bool is_write = false;
|
||||
#else
|
||||
void* const exception_pc = nullptr;
|
||||
const bool is_write = false;
|
||||
#endif
|
||||
|
||||
std::lock_guard<std::mutex> guard(m_handler_lock);
|
||||
for (const RegisteredHandler& rh : m_handlers)
|
||||
{
|
||||
if (rh.callback(exception_pc, exception_address, is_write) == HandlerResult::ContinueExecution)
|
||||
{
|
||||
s_in_handler = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// call old signal handler
|
||||
if (s_old_sigsegv_action.sa_flags & SA_SIGINFO)
|
||||
s_old_sigsegv_action.sa_sigaction(sig, info, ctx);
|
||||
else if (s_old_sigsegv_action.sa_handler == SIG_DFL)
|
||||
signal(sig, SIG_DFL);
|
||||
else if (s_old_sigsegv_action.sa_handler == SIG_IGN)
|
||||
return;
|
||||
else
|
||||
s_old_sigsegv_action.sa_handler(sig);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool InstallHandler(void* owner, Callback callback)
|
||||
{
|
||||
bool was_empty;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_handler_lock);
|
||||
if (std::find_if(m_handlers.begin(), m_handlers.end(),
|
||||
[owner](const RegisteredHandler& rh) { return rh.owner == owner; }) != m_handlers.end())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
was_empty = m_handlers.empty();
|
||||
m_handlers.push_back(RegisteredHandler{owner, std::move(callback)});
|
||||
}
|
||||
|
||||
if (was_empty)
|
||||
{
|
||||
#if defined(WIN32)
|
||||
s_veh_handle = AddVectoredExceptionHandler(1, ExceptionHandler);
|
||||
if (!s_veh_handle)
|
||||
{
|
||||
Log_ErrorPrint("Failed to add vectored exception handler");
|
||||
return false;
|
||||
}
|
||||
#elif defined(USE_SIGSEGV)
|
||||
#if 0
|
||||
// TODO: Is this needed?
|
||||
stack_t signal_stack = {};
|
||||
signal_stack.ss_sp = malloc(SIGSTKSZ);
|
||||
signal_stack.ss_size = SIGSTKSZ;
|
||||
if (sigaltstack(&signal_stack, nullptr))
|
||||
{
|
||||
Log_ErrorPrintf("signaltstack() failed: %d", errno);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct sigaction sa = {};
|
||||
sa.sa_sigaction = SIGSEGVHandler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(SIGSEGV, &sa, &s_old_sigsegv_action) < 0)
|
||||
{
|
||||
Log_ErrorPrintf("sigaction() failed: %d", errno);
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RemoveHandler(void* owner)
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_handler_lock);
|
||||
auto it = std::find_if(m_handlers.begin(), m_handlers.end(),
|
||||
[owner](const RegisteredHandler& rh) { return rh.owner == owner; });
|
||||
if (it == m_handlers.end())
|
||||
return false;
|
||||
|
||||
m_handlers.erase(it);
|
||||
|
||||
if (m_handlers.empty())
|
||||
{
|
||||
#if defined(WIN32)
|
||||
RemoveVectoredExceptionHandler(s_veh_handle);
|
||||
s_veh_handle = nullptr;
|
||||
#else
|
||||
// restore old signal handler
|
||||
if (sigaction(SIGSEGV, &s_old_sigsegv_action, nullptr) < 0)
|
||||
{
|
||||
Log_ErrorPrintf("sigaction() failed: %d", errno);
|
||||
return false;
|
||||
}
|
||||
|
||||
s_old_sigsegv_action = {};
|
||||
#endif
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Common::PageFaultHandler
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
namespace Common::PageFaultHandler {
|
||||
enum class HandlerResult
|
||||
{
|
||||
ContinueExecution,
|
||||
ExecuteNextHandler,
|
||||
};
|
||||
|
||||
using Callback = HandlerResult(*)(void* exception_pc, void* fault_address, bool is_write);
|
||||
using Handle = void*;
|
||||
|
||||
bool InstallHandler(void* owner, Callback callback);
|
||||
bool RemoveHandler(void* owner);
|
||||
|
||||
} // namespace Common::PageFaultHandler
|
324
src/core/bus.cpp
324
src/core/bus.cpp
|
@ -10,6 +10,7 @@
|
|||
#include "cpu_disasm.h"
|
||||
#include "dma.h"
|
||||
#include "gpu.h"
|
||||
#include "host_interface.h"
|
||||
#include "interrupt_controller.h"
|
||||
#include "mdec.h"
|
||||
#include "pad.h"
|
||||
|
@ -69,8 +70,9 @@ union MEMCTRL
|
|||
};
|
||||
|
||||
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
|
||||
u8 g_ram[RAM_SIZE]{}; // 2MB RAM
|
||||
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
||||
u8* g_ram = nullptr; // 2MB RAM
|
||||
u8* g_bios = nullptr; // 512K BIOS ROM
|
||||
u8* g_scratchpad = nullptr;
|
||||
|
||||
static std::array<TickCount, 3> m_exp1_access_time = {};
|
||||
static std::array<TickCount, 3> m_exp2_access_time = {};
|
||||
|
@ -85,9 +87,19 @@ static u32 m_ram_size_reg = 0;
|
|||
|
||||
static std::string m_tty_line_buffer;
|
||||
|
||||
static Common::MemoryArena m_memory_arena;
|
||||
static u8* m_fastmem_base = nullptr;
|
||||
static std::vector<Common::MemoryArena::View> m_fastmem_ram_views;
|
||||
static std::vector<Common::MemoryArena::View> m_fastmem_scratchpad_views;
|
||||
static std::vector<Common::MemoryArena::View> m_fastmem_bios_views;
|
||||
|
||||
static std::tuple<TickCount, TickCount, TickCount> CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay);
|
||||
static void RecalculateMemoryTimings();
|
||||
|
||||
static void SetCodePageFastmemProtection(u32 page_index, bool writable);
|
||||
static bool AllocateMemory();
|
||||
static void UnmapFastmemViews();
|
||||
|
||||
#define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3))
|
||||
#define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u))
|
||||
#define FIXUP_HALFWORD_READ_OFFSET(offset) ((offset) & ~u32(1))
|
||||
|
@ -103,19 +115,35 @@ ALWAYS_INLINE static void FixupUnalignedWordAccessW32(u32& offset, u32& value)
|
|||
value <<= byte_offset * 8;
|
||||
}
|
||||
|
||||
void Initialize()
|
||||
bool Initialize()
|
||||
{
|
||||
if (!AllocateMemory())
|
||||
{
|
||||
g_host_interface->ReportError("Failed to allocate memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
//
|
||||
UnmapFastmemViews();
|
||||
if (g_ram)
|
||||
m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE);
|
||||
if (g_bios)
|
||||
m_memory_arena.ReleaseViewPtr(g_bios, BIOS_SIZE);
|
||||
if (g_scratchpad)
|
||||
m_memory_arena.ReleaseViewPtr(g_scratchpad, FASTMEM_SCRATCHPAD_SIZE);
|
||||
|
||||
CPU::g_state.fastmem_base = nullptr;
|
||||
}
|
||||
|
||||
void Reset()
|
||||
{
|
||||
std::memset(g_ram, 0, sizeof(g_ram));
|
||||
std::memset(g_ram, 0, RAM_SIZE);
|
||||
std::memset(g_scratchpad, 0, SCRATCHPAD_SIZE);
|
||||
m_MEMCTRL.exp1_base = 0x1F000000;
|
||||
m_MEMCTRL.exp2_base = 0x1F802000;
|
||||
m_MEMCTRL.exp1_delay_size.bits = 0x0013243F;
|
||||
|
@ -137,8 +165,9 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&m_bios_access_time);
|
||||
sw.Do(&m_cdrom_access_time);
|
||||
sw.Do(&m_spu_access_time);
|
||||
sw.DoBytes(g_ram, sizeof(g_ram));
|
||||
sw.DoBytes(g_bios, sizeof(g_bios));
|
||||
sw.DoBytes(g_ram, RAM_SIZE);
|
||||
sw.DoBytes(g_bios, BIOS_SIZE);
|
||||
sw.DoBytes(g_scratchpad, SCRATCHPAD_SIZE);
|
||||
sw.DoArray(m_MEMCTRL.regs, countof(m_MEMCTRL.regs));
|
||||
sw.Do(&m_ram_size_reg);
|
||||
sw.Do(&m_tty_line_buffer);
|
||||
|
@ -217,6 +246,209 @@ void RecalculateMemoryTimings()
|
|||
m_spu_access_time[2] + 1);
|
||||
}
|
||||
|
||||
bool AllocateMemory()
|
||||
{
|
||||
if (!m_memory_arena.Create(MEMORY_ARENA_SIZE, true, false))
|
||||
{
|
||||
Log_ErrorPrint("Failed to create memory arena");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the base views.
|
||||
g_ram = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false));
|
||||
g_bios = static_cast<u8*>(m_memory_arena.CreateViewPtr(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, true, false));
|
||||
g_scratchpad = static_cast<u8*>(
|
||||
m_memory_arena.CreateViewPtr(MEMORY_ARENA_SCRATCHPAD_OFFSET, FASTMEM_SCRATCHPAD_SIZE, true, false));
|
||||
if (!g_ram || !g_bios)
|
||||
{
|
||||
Log_ErrorPrint("Failed to create base views of memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void UnmapFastmemViews()
|
||||
{
|
||||
m_fastmem_ram_views.clear();
|
||||
m_fastmem_scratchpad_views.clear();
|
||||
m_fastmem_bios_views.clear();
|
||||
}
|
||||
|
||||
void UpdateFastmemViews(bool enabled, bool isolate_cache)
|
||||
{
|
||||
UnmapFastmemViews();
|
||||
if (!enabled)
|
||||
{
|
||||
m_fastmem_base = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false");
|
||||
if (!m_fastmem_base)
|
||||
{
|
||||
m_fastmem_base = static_cast<u8*>(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE));
|
||||
if (!m_fastmem_base)
|
||||
{
|
||||
Log_ErrorPrint("Failed to find base address for fastmem");
|
||||
return;
|
||||
}
|
||||
|
||||
Log_InfoPrintf("Fastmem base: %p", m_fastmem_base);
|
||||
CPU::g_state.fastmem_base = m_fastmem_base;
|
||||
}
|
||||
|
||||
auto MapRAM = [](u32 base_address) {
|
||||
u8* map_address = m_fastmem_base + base_address;
|
||||
auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, true, false, map_address);
|
||||
if (!view)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// mark all pages with code as non-writable
|
||||
for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++)
|
||||
{
|
||||
if (m_ram_code_bits[i])
|
||||
{
|
||||
u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE);
|
||||
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false))
|
||||
Log_ErrorPrintf("Failed to write-protect code page at %p");
|
||||
}
|
||||
}
|
||||
|
||||
m_fastmem_ram_views.push_back(std::move(view.value()));
|
||||
};
|
||||
auto MapScratchpad = [](u32 base_address) {
|
||||
u8* map_address = m_fastmem_base + base_address;
|
||||
auto view =
|
||||
m_memory_arena.CreateView(MEMORY_ARENA_SCRATCHPAD_OFFSET, FASTMEM_SCRATCHPAD_SIZE, true, false, map_address);
|
||||
if (!view)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map scratchpad at fastmem area %p (offset 0x%08X)", map_address,
|
||||
FASTMEM_SCRATCHPAD_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// mark all pages beyond the first as inaccessible
|
||||
// we need to do this because of windows's stupidity with its 64K mapping granularity
|
||||
if (!m_memory_arena.SetPageProtection(map_address + CPU_CODE_CACHE_PAGE_SIZE,
|
||||
FASTMEM_SCRATCHPAD_SIZE - CPU_CODE_CACHE_PAGE_SIZE, false, false, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to read/write protect scratchpad");
|
||||
}
|
||||
|
||||
m_fastmem_scratchpad_views.push_back(std::move(view.value()));
|
||||
};
|
||||
auto MapBIOS = [](u32 base_address) {
|
||||
u8* map_address = m_fastmem_base + base_address;
|
||||
auto view = m_memory_arena.CreateView(MEMORY_ARENA_BIOS_OFFSET, BIOS_SIZE, false, false, map_address);
|
||||
if (!view)
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map BIOS at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
m_fastmem_bios_views.push_back(std::move(view.value()));
|
||||
};
|
||||
|
||||
if (!isolate_cache)
|
||||
{
|
||||
// KUSEG - cached
|
||||
MapRAM(0x00000000);
|
||||
// MapScratchpad(0x1F800000);
|
||||
// MapBIOS(0x1FC00000);
|
||||
|
||||
// KSEG0 - cached
|
||||
MapRAM(0x80000000);
|
||||
// MapScratchpad(0x9F800000);
|
||||
// MapBIOS(0x9FC00000);
|
||||
}
|
||||
|
||||
// KSEG1 - uncached
|
||||
MapRAM(0xA0000000);
|
||||
// MapBIOS(0xBFC00000);
|
||||
}
|
||||
|
||||
bool IsRAMCodePage(u32 index)
|
||||
{
|
||||
return m_ram_code_bits[index];
|
||||
}
|
||||
|
||||
void SetRAMCodePage(u32 index)
|
||||
{
|
||||
if (m_ram_code_bits[index])
|
||||
return;
|
||||
|
||||
// protect fastmem pages
|
||||
m_ram_code_bits[index] = true;
|
||||
SetCodePageFastmemProtection(index, false);
|
||||
}
|
||||
|
||||
void ClearRAMCodePage(u32 index)
|
||||
{
|
||||
if (!m_ram_code_bits[index])
|
||||
return;
|
||||
|
||||
// unprotect fastmem pages
|
||||
m_ram_code_bits[index] = false;
|
||||
SetCodePageFastmemProtection(index, true);
|
||||
}
|
||||
|
||||
void SetCodePageFastmemProtection(u32 page_index, bool writable)
|
||||
{
|
||||
// unprotect fastmem pages
|
||||
for (const auto& view : m_fastmem_ram_views)
|
||||
{
|
||||
u8* page_address = static_cast<u8*>(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE);
|
||||
if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index,
|
||||
page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClearRAMCodePageFlags()
|
||||
{
|
||||
m_ram_code_bits.reset();
|
||||
|
||||
// unprotect fastmem pages
|
||||
for (const auto& view : m_fastmem_ram_views)
|
||||
{
|
||||
if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IsCodePageAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false;
|
||||
}
|
||||
|
||||
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size)
|
||||
{
|
||||
if (!IsRAMAddress(start_address))
|
||||
return false;
|
||||
|
||||
start_address = (start_address & RAM_MASK);
|
||||
|
||||
const u32 end_address = start_address + size;
|
||||
while (start_address < end_address)
|
||||
{
|
||||
const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
if (m_ram_code_bits[code_page_index])
|
||||
return true;
|
||||
|
||||
start_address += CPU_CODE_CACHE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static TickCount DoInvalidAccess(MemoryAccessType type, MemoryAccessSize size, PhysicalMemoryAddress address,
|
||||
u32& value)
|
||||
{
|
||||
|
@ -288,7 +520,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value)
|
|||
}
|
||||
}
|
||||
|
||||
return (type == MemoryAccessType::Read) ? 4 : 0;
|
||||
return (type == MemoryAccessType::Read) ? RAM_READ_TICKS : 0;
|
||||
}
|
||||
|
||||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
|
@ -753,7 +985,7 @@ ALWAYS_INLINE_RELEASE void DoInstructionRead(PhysicalMemoryAddress address, void
|
|||
{
|
||||
std::memcpy(data, &g_ram[address & RAM_MASK], sizeof(u32) * word_count);
|
||||
if constexpr (add_ticks)
|
||||
g_state.pending_ticks += (icache_read ? 1 : 4) * word_count;
|
||||
g_state.pending_ticks += (icache_read ? 1 : RAM_READ_TICKS) * word_count;
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
|
@ -776,7 +1008,7 @@ TickCount GetInstructionReadTicks(VirtualMemoryAddress address)
|
|||
|
||||
if (address < RAM_MIRROR_END)
|
||||
{
|
||||
return 4;
|
||||
return RAM_READ_TICKS;
|
||||
}
|
||||
else if (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
|
@ -894,34 +1126,36 @@ static void WriteCacheControl(u32 value)
|
|||
template<MemoryAccessType type, MemoryAccessSize size>
|
||||
ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address, u32& value)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
const PhysicalMemoryAddress cache_offset = address & DCACHE_OFFSET_MASK;
|
||||
if constexpr (size == MemoryAccessSize::Byte)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
value = ZeroExtend32(g_state.dcache[cache_offset]);
|
||||
value = ZeroExtend32(g_scratchpad[cache_offset]);
|
||||
else
|
||||
g_state.dcache[cache_offset] = Truncate8(value);
|
||||
g_scratchpad[cache_offset] = Truncate8(value);
|
||||
}
|
||||
else if constexpr (size == MemoryAccessSize::HalfWord)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
{
|
||||
u16 temp;
|
||||
std::memcpy(&temp, &g_state.dcache[cache_offset], sizeof(temp));
|
||||
std::memcpy(&temp, &g_scratchpad[cache_offset], sizeof(temp));
|
||||
value = ZeroExtend32(temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
u16 temp = Truncate16(value);
|
||||
std::memcpy(&g_state.dcache[cache_offset], &temp, sizeof(temp));
|
||||
std::memcpy(&g_scratchpad[cache_offset], &temp, sizeof(temp));
|
||||
}
|
||||
}
|
||||
else if constexpr (size == MemoryAccessSize::Word)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
std::memcpy(&value, &g_state.dcache[cache_offset], sizeof(value));
|
||||
std::memcpy(&value, &g_scratchpad[cache_offset], sizeof(value));
|
||||
else
|
||||
std::memcpy(&g_state.dcache[cache_offset], &value, sizeof(value));
|
||||
std::memcpy(&g_scratchpad[cache_offset], &value, sizeof(value));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1307,6 +1541,64 @@ bool SafeWriteMemoryWord(VirtualMemoryAddress addr, u32 value)
|
|||
return DoMemoryAccess<MemoryAccessType::Write, MemoryAccessSize::Word>(addr, value) >= 0;
|
||||
}
|
||||
|
||||
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
const u32 seg = (address >> 29);
|
||||
if (seg != 0 && seg != 4 && seg != 5)
|
||||
return nullptr;
|
||||
|
||||
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
if (paddr < RAM_MIRROR_END)
|
||||
{
|
||||
if (read_ticks)
|
||||
*read_ticks = RAM_READ_TICKS;
|
||||
|
||||
return &g_ram[paddr & RAM_MASK];
|
||||
}
|
||||
|
||||
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||
{
|
||||
if (read_ticks)
|
||||
*read_ticks = 0;
|
||||
|
||||
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||
}
|
||||
|
||||
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
|
||||
{
|
||||
if (read_ticks)
|
||||
*read_ticks = m_bios_access_time[static_cast<u32>(size)];
|
||||
|
||||
return &g_bios[paddr & BIOS_MASK];
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size)
|
||||
{
|
||||
using namespace Bus;
|
||||
|
||||
const u32 seg = (address >> 29);
|
||||
if (seg != 0 && seg != 4 && seg != 5)
|
||||
return nullptr;
|
||||
|
||||
const PhysicalMemoryAddress paddr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
|
||||
|
||||
#if 0
|
||||
// Not enabled until we can protect code regions.
|
||||
if (paddr < RAM_MIRROR_END)
|
||||
return &g_ram[paddr & RAM_MASK];
|
||||
#endif
|
||||
|
||||
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
namespace Recompiler::Thunks {
|
||||
|
||||
u64 ReadMemoryByte(u32 address)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#pragma once
|
||||
#include "common/bitfield.h"
|
||||
#include "cpu_code_cache.h"
|
||||
#include "common/memory_arena.h"
|
||||
#include "types.h"
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
|
@ -20,6 +20,9 @@ enum : u32
|
|||
EXP1_BASE = 0x1F000000,
|
||||
EXP1_SIZE = 0x800000,
|
||||
EXP1_MASK = EXP1_SIZE - 1,
|
||||
SCRATCHPAD_BASE = 0x1F800000,
|
||||
SCRATCHPAD_SIZE = 0x400,
|
||||
SCRATCHPAD_MASK = SCRATCHPAD_SIZE - 1,
|
||||
MEMCTRL_BASE = 0x1F801000,
|
||||
MEMCTRL_SIZE = 0x40,
|
||||
MEMCTRL_MASK = MEMCTRL_SIZE - 1,
|
||||
|
@ -66,26 +69,72 @@ enum : u32
|
|||
MEMCTRL_REG_COUNT = 9
|
||||
};
|
||||
|
||||
void Initialize();
|
||||
enum : TickCount
|
||||
{
|
||||
RAM_READ_TICKS = 4
|
||||
};
|
||||
|
||||
enum : size_t
|
||||
{
|
||||
FASTMEM_SCRATCHPAD_SIZE = 0x10000,
|
||||
|
||||
// Our memory arena contains storage for RAM and BIOS.
|
||||
MEMORY_ARENA_SIZE = RAM_SIZE + FASTMEM_SCRATCHPAD_SIZE + BIOS_SIZE,
|
||||
|
||||
// Offsets within the memory arena.
|
||||
MEMORY_ARENA_RAM_OFFSET = 0,
|
||||
MEMORY_ARENA_SCRATCHPAD_OFFSET = MEMORY_ARENA_RAM_OFFSET + RAM_SIZE,
|
||||
MEMORY_ARENA_BIOS_OFFSET = MEMORY_ARENA_SCRATCHPAD_OFFSET + FASTMEM_SCRATCHPAD_SIZE,
|
||||
|
||||
// Fastmem region size is 4GB to cover the entire 32-bit address space.
|
||||
FASTMEM_REGION_SIZE = UINT64_C(0x100000000)
|
||||
};
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void Reset();
|
||||
bool DoState(StateWrapper& sw);
|
||||
|
||||
u8* GetFastmemBase();
|
||||
void UpdateFastmemViews(bool enabled, bool isolate_cache);
|
||||
|
||||
void SetExpansionROM(std::vector<u8> data);
|
||||
void SetBIOS(const std::vector<u8>& image);
|
||||
|
||||
extern std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits;
|
||||
extern u8 g_ram[RAM_SIZE]; // 2MB RAM
|
||||
extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM
|
||||
extern u8* g_ram; // 2MB RAM
|
||||
extern u8* g_bios; // 512K BIOS ROM
|
||||
extern u8* g_scratchpad; // 1KB scratchpad as 4K (in fastmem)
|
||||
|
||||
/// Returns true if the address specified is writable (RAM).
|
||||
ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address)
|
||||
{
|
||||
return address < RAM_MIRROR_END;
|
||||
}
|
||||
|
||||
/// Returns the code page index for a RAM address.
|
||||
ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address)
|
||||
{
|
||||
return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
/// Returns true if the specified page contains code.
|
||||
bool IsRAMCodePage(u32 index);
|
||||
|
||||
/// Flags a RAM region as code, so we know when to invalidate blocks.
|
||||
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
|
||||
void SetRAMCodePage(u32 index);
|
||||
|
||||
/// Unflags a RAM region as code, the code cache will no longer be notified when writes occur.
|
||||
ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; }
|
||||
void ClearRAMCodePage(u32 index);
|
||||
|
||||
/// Clears all code bits for RAM regions.
|
||||
ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); }
|
||||
void ClearRAMCodePageFlags();
|
||||
|
||||
/// Returns true if the specified address is in a code page.
|
||||
bool IsCodePageAddress(PhysicalMemoryAddress address);
|
||||
|
||||
/// Returns true if the range specified overlaps with a code page.
|
||||
bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size);
|
||||
|
||||
/// Returns the number of cycles stolen by DMA RAM access.
|
||||
ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
|
||||
|
@ -97,16 +146,4 @@ ALWAYS_INLINE TickCount GetDMARAMTickCount(u32 word_count)
|
|||
return static_cast<TickCount>(word_count + ((word_count + 15) / 16));
|
||||
}
|
||||
|
||||
/// Invalidates any code pages which overlap the specified range.
|
||||
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
|
||||
{
|
||||
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
for (u32 page = start_page; page <= end_page; page++)
|
||||
{
|
||||
if (m_ram_code_bits[page])
|
||||
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Bus
|
|
@ -5,6 +5,7 @@
|
|||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_disasm.h"
|
||||
#include "settings.h"
|
||||
#include "system.h"
|
||||
#include "timing_event.h"
|
||||
Log_SetChannel(CPU::CodeCache);
|
||||
|
@ -35,14 +36,8 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
|
|||
|
||||
static JitCodeBuffer s_code_buffer;
|
||||
|
||||
enum : u32
|
||||
{
|
||||
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
|
||||
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
|
||||
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
|
||||
};
|
||||
|
||||
std::array<CodeBlock::HostCodePointer, FAST_MAP_TOTAL_SLOT_COUNT> s_fast_map;
|
||||
CodeBlock::HostCodePointer s_asm_dispatcher;
|
||||
|
||||
ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
|
||||
{
|
||||
|
@ -51,6 +46,7 @@ ALWAYS_INLINE static u32 GetFastMapIndex(u32 pc)
|
|||
((pc & Bus::RAM_MASK) >> 2);
|
||||
}
|
||||
|
||||
static void CompileDispatcher();
|
||||
static void FastCompileBlockFunction();
|
||||
|
||||
static void ResetFastMap()
|
||||
|
@ -66,6 +62,7 @@ static void SetFastMap(u32 pc, CodeBlock::HostCodePointer function)
|
|||
#endif
|
||||
|
||||
using BlockMap = std::unordered_map<u32, CodeBlock*>;
|
||||
using HostCodeMap = std::map<CodeBlock::HostCodePointer, CodeBlock*>;
|
||||
|
||||
void LogCurrentState();
|
||||
|
||||
|
@ -90,16 +87,26 @@ static void LinkBlock(CodeBlock* from, CodeBlock* to);
|
|||
/// Unlink all blocks which point to this block, and any that this block links to.
|
||||
static void UnlinkBlock(CodeBlock* block);
|
||||
|
||||
static bool s_use_recompiler = false;
|
||||
static BlockMap s_blocks;
|
||||
static std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
|
||||
|
||||
void Initialize(bool use_recompiler)
|
||||
#ifdef WITH_RECOMPILER
|
||||
static HostCodeMap s_host_code_map;
|
||||
|
||||
static void AddBlockToHostCodeMap(CodeBlock* block);
|
||||
static void RemoveBlockFromHostCodeMap(CodeBlock* block);
|
||||
static bool InitializeFastmem();
|
||||
static void ShutdownFastmem();
|
||||
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
|
||||
#endif
|
||||
|
||||
void Initialize()
|
||||
{
|
||||
Assert(s_blocks.empty());
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_use_recompiler = use_recompiler;
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
RECOMPILER_GUARD_SIZE))
|
||||
|
@ -110,15 +117,19 @@ void Initialize(bool use_recompiler)
|
|||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
Panic("Failed to initialize fastmem");
|
||||
|
||||
ResetFastMap();
|
||||
#else
|
||||
s_use_recompiler = false;
|
||||
CompileDispatcher();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
Flush();
|
||||
ShutdownFastmem();
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_code_buffer.Destroy();
|
||||
#endif
|
||||
|
@ -131,18 +142,17 @@ static void ExecuteImpl()
|
|||
|
||||
while (!g_state.frame_done)
|
||||
{
|
||||
if (HasPendingInterrupt())
|
||||
{
|
||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
DispatchInterrupt();
|
||||
}
|
||||
|
||||
TimingEvents::UpdateCPUDowncount();
|
||||
|
||||
next_block_key = GetNextBlockKey();
|
||||
while (g_state.pending_ticks < g_state.downcount)
|
||||
{
|
||||
if (HasPendingInterrupt())
|
||||
{
|
||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
DispatchInterrupt();
|
||||
next_block_key = GetNextBlockKey();
|
||||
}
|
||||
|
||||
CodeBlock* block = LookupBlock(next_block_key);
|
||||
if (!block)
|
||||
{
|
||||
|
@ -152,6 +162,7 @@ static void ExecuteImpl()
|
|||
}
|
||||
|
||||
reexecute_block:
|
||||
Assert(!(HasPendingInterrupt()));
|
||||
|
||||
#if 0
|
||||
const u32 tick = TimingEvents::GetGlobalTickCounter() + CPU::GetPendingTicks();
|
||||
|
@ -170,7 +181,7 @@ static void ExecuteImpl()
|
|||
|
||||
if (g_state.pending_ticks >= g_state.downcount)
|
||||
break;
|
||||
else if (HasPendingInterrupt() || !USE_BLOCK_LINKING)
|
||||
else if (!USE_BLOCK_LINKING)
|
||||
continue;
|
||||
|
||||
next_block_key = GetNextBlockKey();
|
||||
|
@ -237,13 +248,21 @@ void Execute()
|
|||
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
void CompileDispatcher()
|
||||
{
|
||||
Recompiler::CodeGenerator cg(&s_code_buffer);
|
||||
s_asm_dispatcher = cg.CompileDispatcher();
|
||||
}
|
||||
|
||||
CodeBlock::HostCodePointer* GetFastMapPointer()
|
||||
{
|
||||
return s_fast_map.data();
|
||||
}
|
||||
|
||||
void ExecuteRecompiler()
|
||||
{
|
||||
#if 0
|
||||
while (!g_state.frame_done)
|
||||
{
|
||||
TimingEvents::UpdateCPUDowncount();
|
||||
|
||||
while (g_state.pending_ticks < g_state.downcount)
|
||||
{
|
||||
if (HasPendingInterrupt())
|
||||
{
|
||||
|
@ -251,6 +270,10 @@ void ExecuteRecompiler()
|
|||
DispatchInterrupt();
|
||||
}
|
||||
|
||||
TimingEvents::UpdateCPUDowncount();
|
||||
|
||||
while (g_state.pending_ticks < g_state.downcount)
|
||||
{
|
||||
const u32 pc = g_state.regs.pc;
|
||||
g_state.current_instruction_pc = pc;
|
||||
const u32 fast_map_index = GetFastMapIndex(pc);
|
||||
|
@ -259,6 +282,9 @@ void ExecuteRecompiler()
|
|||
|
||||
TimingEvents::RunEvents();
|
||||
}
|
||||
#else
|
||||
s_asm_dispatcher();
|
||||
#endif
|
||||
|
||||
// in case we switch to interpreter...
|
||||
g_state.regs.npc = g_state.regs.pc;
|
||||
|
@ -266,14 +292,33 @@ void ExecuteRecompiler()
|
|||
|
||||
#endif
|
||||
|
||||
void SetUseRecompiler(bool enable)
|
||||
void Reinitialize()
|
||||
{
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (s_use_recompiler == enable)
|
||||
return;
|
||||
|
||||
s_use_recompiler = enable;
|
||||
Flush();
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
ShutdownFastmem();
|
||||
s_code_buffer.Destroy();
|
||||
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
RECOMPILER_GUARD_SIZE))
|
||||
#else
|
||||
if (!s_code_buffer.Allocate(RECOMPILER_CODE_CACHE_SIZE, RECOMPILER_FAR_CODE_CACHE_SIZE))
|
||||
#endif
|
||||
{
|
||||
Panic("Failed to initialize code space");
|
||||
}
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
Panic("Failed to initialize fastmem");
|
||||
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -285,10 +330,13 @@ void Flush()
|
|||
|
||||
for (const auto& it : s_blocks)
|
||||
delete it.second;
|
||||
|
||||
s_blocks.clear();
|
||||
#ifdef WITH_RECOMPILER
|
||||
s_host_code_map.clear();
|
||||
s_code_buffer.Reset();
|
||||
ResetFastMap();
|
||||
CompileDispatcher();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -344,6 +392,8 @@ CodeBlock* LookupBlock(CodeBlockKey key)
|
|||
}
|
||||
|
||||
iter = s_blocks.emplace(key.bits, block).first;
|
||||
AddBlockToHostCodeMap(block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
|
@ -370,6 +420,8 @@ bool RevalidateBlock(CodeBlock* block)
|
|||
return true;
|
||||
|
||||
recompile:
|
||||
RemoveBlockFromHostCodeMap(block);
|
||||
|
||||
block->instructions.clear();
|
||||
if (!CompileBlock(block))
|
||||
{
|
||||
|
@ -379,6 +431,7 @@ recompile:
|
|||
}
|
||||
|
||||
// re-add to page map again
|
||||
AddBlockToHostCodeMap(block);
|
||||
if (block->IsInRAM())
|
||||
AddBlockToPageMap(block);
|
||||
|
||||
|
@ -425,6 +478,9 @@ bool CompileBlock(CodeBlock* block)
|
|||
block->uncached_fetch_ticks += GetInstructionReadTicks(pc);
|
||||
}
|
||||
|
||||
block->contains_loadstore_instructions |= cbi.is_load_instruction;
|
||||
block->contains_loadstore_instructions |= cbi.is_store_instruction;
|
||||
|
||||
// instruction is decoded now
|
||||
block->instructions.push_back(cbi);
|
||||
pc += sizeof(cbi.instruction.bits);
|
||||
|
@ -467,7 +523,7 @@ bool CompileBlock(CodeBlock* block)
|
|||
}
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
if (s_use_recompiler)
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
// Ensure we're not going to run out of space while compiling this block.
|
||||
if (s_code_buffer.GetFreeCodeSpace() <
|
||||
|
@ -538,6 +594,9 @@ void FlushBlock(CodeBlock* block)
|
|||
RemoveBlockFromPageMap(block);
|
||||
|
||||
UnlinkBlock(block);
|
||||
#ifdef WITH_RECOMPILER
|
||||
RemoveBlockFromHostCodeMap(block);
|
||||
#endif
|
||||
|
||||
s_blocks.erase(iter);
|
||||
delete block;
|
||||
|
@ -599,4 +658,107 @@ void UnlinkBlock(CodeBlock* block)
|
|||
block->link_successors.clear();
|
||||
}
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
|
||||
void AddBlockToHostCodeMap(CodeBlock* block)
|
||||
{
|
||||
if (!g_settings.IsUsingRecompiler())
|
||||
return;
|
||||
|
||||
auto ir = s_host_code_map.emplace(block->host_code, block);
|
||||
Assert(ir.second);
|
||||
}
|
||||
|
||||
void RemoveBlockFromHostCodeMap(CodeBlock* block)
|
||||
{
|
||||
if (!g_settings.IsUsingRecompiler())
|
||||
return;
|
||||
|
||||
HostCodeMap::iterator hc_iter = s_host_code_map.find(block->host_code);
|
||||
Assert(hc_iter != s_host_code_map.end());
|
||||
s_host_code_map.erase(hc_iter);
|
||||
}
|
||||
|
||||
bool InitializeFastmem()
|
||||
{
|
||||
if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to install page fault handler");
|
||||
return false;
|
||||
}
|
||||
|
||||
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShutdownFastmem()
|
||||
{
|
||||
Common::PageFaultHandler::RemoveHandler(&s_host_code_map);
|
||||
Bus::UpdateFastmemViews(false, false);
|
||||
}
|
||||
|
||||
Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write)
|
||||
{
|
||||
if (static_cast<u8*>(fault_address) < g_state.fastmem_base ||
|
||||
(static_cast<u8*>(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE)
|
||||
{
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
const PhysicalMemoryAddress fastmem_address =
|
||||
static_cast<PhysicalMemoryAddress>(static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - g_state.fastmem_base));
|
||||
|
||||
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
|
||||
is_write ? "(write)" : "(read)", fastmem_address);
|
||||
|
||||
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(fastmem_address))
|
||||
{
|
||||
// this is probably a code page, since we aren't going to fault due to requiring fastmem on RAM.
|
||||
const u32 code_page_index = Bus::GetRAMCodePageIndex(fastmem_address);
|
||||
if (Bus::IsRAMCodePage(code_page_index))
|
||||
{
|
||||
InvalidateBlocksWithPageIndex(code_page_index);
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
}
|
||||
|
||||
// use upper_bound to find the next block after the pc
|
||||
HostCodeMap::iterator upper_iter =
|
||||
s_host_code_map.upper_bound(reinterpret_cast<CodeBlock::HostCodePointer>(exception_pc));
|
||||
if (upper_iter == s_host_code_map.begin())
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
|
||||
// then decrement it by one to (hopefully) get the block we want
|
||||
upper_iter--;
|
||||
|
||||
// find the loadstore info in the code block
|
||||
CodeBlock* block = upper_iter->second;
|
||||
for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end();
|
||||
++bpi_iter)
|
||||
{
|
||||
const Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter;
|
||||
if (lbi.host_pc == exception_pc)
|
||||
{
|
||||
// found it, do fixup
|
||||
if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi))
|
||||
{
|
||||
// remove the backpatch entry since we won't be coming back to this one
|
||||
block->loadstore_backpatch_info.erase(bpi_iter);
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
else
|
||||
{
|
||||
Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC());
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we didn't find the pc in our list..
|
||||
Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC());
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace CPU::CodeCache
|
||||
|
|
|
@ -1,14 +1,28 @@
|
|||
#pragma once
|
||||
#include "bus.h"
|
||||
#include "common/bitfield.h"
|
||||
#include "common/jit_code_buffer.h"
|
||||
#include "common/page_fault_handler.h"
|
||||
#include "cpu_types.h"
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
#include "cpu_recompiler_types.h"
|
||||
#endif
|
||||
|
||||
namespace CPU {
|
||||
|
||||
enum : u32
|
||||
{
|
||||
FAST_MAP_RAM_SLOT_COUNT = Bus::RAM_SIZE / 4,
|
||||
FAST_MAP_BIOS_SLOT_COUNT = Bus::BIOS_SIZE / 4,
|
||||
FAST_MAP_TOTAL_SLOT_COUNT = FAST_MAP_RAM_SLOT_COUNT + FAST_MAP_BIOS_SLOT_COUNT,
|
||||
};
|
||||
|
||||
union CodeBlockKey
|
||||
{
|
||||
u32 bits;
|
||||
|
@ -63,6 +77,12 @@ struct CodeBlock
|
|||
|
||||
TickCount uncached_fetch_ticks = 0;
|
||||
u32 icache_line_count = 0;
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
std::vector<Recompiler::LoadStoreBackpatchInfo> loadstore_backpatch_info;
|
||||
#endif
|
||||
|
||||
bool contains_loadstore_instructions = false;
|
||||
bool invalidated = false;
|
||||
|
||||
const u32 GetPC() const { return key.GetPC(); }
|
||||
|
@ -81,11 +101,12 @@ struct CodeBlock
|
|||
|
||||
namespace CodeCache {
|
||||
|
||||
void Initialize(bool use_recompiler);
|
||||
void Initialize();
|
||||
void Shutdown();
|
||||
void Execute();
|
||||
|
||||
#ifdef WITH_RECOMPILER
|
||||
CodeBlock::HostCodePointer* GetFastMapPointer();
|
||||
void ExecuteRecompiler();
|
||||
#endif
|
||||
|
||||
|
@ -93,7 +114,7 @@ void ExecuteRecompiler();
|
|||
void Flush();
|
||||
|
||||
/// Changes whether the recompiler is enabled.
|
||||
void SetUseRecompiler(bool enable);
|
||||
void Reinitialize();
|
||||
|
||||
/// Invalidates all blocks which are in the range of the specified code page.
|
||||
void InvalidateBlocksWithPageIndex(u32 page_index);
|
||||
|
@ -102,6 +123,18 @@ template<PGXPMode pgxp_mode>
|
|||
void InterpretCachedBlock(const CodeBlock& block);
|
||||
void InterpretUncachedBlock();
|
||||
|
||||
/// Invalidates any code pages which overlap the specified range.
|
||||
ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count)
|
||||
{
|
||||
const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
const u32 end_page = (address + word_count * sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE;
|
||||
for (u32 page = start_page; page <= end_page; page++)
|
||||
{
|
||||
if (Bus::m_ram_code_bits[page])
|
||||
CPU::CodeCache::InvalidateBlocksWithPageIndex(page);
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace CodeCache
|
||||
|
||||
} // namespace CPU
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "cpu_core.h"
|
||||
#include "bus.h"
|
||||
#include "common/align.h"
|
||||
#include "common/file_system.h"
|
||||
#include "common/log.h"
|
||||
|
@ -123,7 +124,6 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&g_state.next_load_delay_reg);
|
||||
sw.Do(&g_state.next_load_delay_value);
|
||||
sw.Do(&g_state.cache_control.bits);
|
||||
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
|
||||
|
||||
if (!GTE::DoState(sw))
|
||||
return false;
|
||||
|
@ -231,7 +231,16 @@ void RaiseException(u32 CAUSE_bits, u32 EPC)
|
|||
void SetExternalInterrupt(u8 bit)
|
||||
{
|
||||
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
|
||||
|
||||
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
|
||||
{
|
||||
g_state.interrupt_delay = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_state.interrupt_delay = 0;
|
||||
CheckForPendingInterrupt();
|
||||
}
|
||||
}
|
||||
|
||||
void ClearExternalInterrupt(u8 bit)
|
||||
|
@ -397,6 +406,7 @@ ALWAYS_INLINE_RELEASE static void WriteCop0Reg(Cop0Reg reg, u32 value)
|
|||
g_state.cop0_regs.sr.bits =
|
||||
(g_state.cop0_regs.sr.bits & ~Cop0Registers::SR::WRITE_MASK) | (value & Cop0Registers::SR::WRITE_MASK);
|
||||
Log_DebugPrintf("COP0 SR <- %08X (now %08X)", value, g_state.cop0_regs.sr.bits);
|
||||
CheckForPendingInterrupt();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -405,6 +415,7 @@ ALWAYS_INLINE_RELEASE static void WriteCop0Reg(Cop0Reg reg, u32 value)
|
|||
g_state.cop0_regs.cause.bits =
|
||||
(g_state.cop0_regs.cause.bits & ~Cop0Registers::CAUSE::WRITE_MASK) | (value & Cop0Registers::CAUSE::WRITE_MASK);
|
||||
Log_DebugPrintf("COP0 CAUSE <- %08X (now %08X)", value, g_state.cop0_regs.cause.bits);
|
||||
CheckForPendingInterrupt();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1218,6 +1229,7 @@ restart_instruction:
|
|||
// restore mode
|
||||
g_state.cop0_regs.sr.mode_bits =
|
||||
(g_state.cop0_regs.sr.mode_bits & UINT32_C(0b110000)) | (g_state.cop0_regs.sr.mode_bits >> 2);
|
||||
CheckForPendingInterrupt();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1367,6 +1379,21 @@ restart_instruction:
|
|||
}
|
||||
}
|
||||
|
||||
void DispatchInterrupt()
|
||||
{
|
||||
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
|
||||
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
|
||||
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
|
||||
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
|
||||
GTE::ExecuteInstruction(g_state.next_instruction.bits);
|
||||
|
||||
// Interrupt raising occurs before the start of the instruction.
|
||||
RaiseException(
|
||||
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
|
||||
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
|
||||
g_state.regs.pc);
|
||||
}
|
||||
|
||||
template<PGXPMode pgxp_mode>
|
||||
static void ExecuteImpl()
|
||||
{
|
||||
|
@ -1376,9 +1403,10 @@ static void ExecuteImpl()
|
|||
|
||||
while (g_state.pending_ticks < g_state.downcount)
|
||||
{
|
||||
if (HasPendingInterrupt())
|
||||
if (HasPendingInterrupt() && !g_state.interrupt_delay)
|
||||
DispatchInterrupt();
|
||||
|
||||
g_state.interrupt_delay = false;
|
||||
g_state.pending_ticks++;
|
||||
|
||||
// now executing the instruction we previously fetched
|
||||
|
@ -1536,6 +1564,11 @@ bool InterpretInstructionPGXP()
|
|||
return g_state.exception_raised;
|
||||
}
|
||||
|
||||
void UpdateFastmemMapping()
|
||||
{
|
||||
Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc);
|
||||
}
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
} // namespace CPU
|
|
@ -79,8 +79,8 @@ struct State
|
|||
// GTE registers are stored here so we can access them on ARM with a single instruction
|
||||
GTE::Regs gte_regs = {};
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
||||
u8* fastmem_base = nullptr;
|
||||
|
||||
std::array<u32, ICACHE_LINES> icache_tags = {};
|
||||
std::array<u8, ICACHE_SIZE> icache_data = {};
|
||||
};
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include "cpu_core.h"
|
||||
#include "bus.h"
|
||||
|
||||
namespace CPU {
|
||||
|
||||
|
@ -7,33 +8,20 @@ namespace CPU {
|
|||
void RaiseException(Exception excode);
|
||||
void RaiseException(u32 CAUSE_bits, u32 EPC);
|
||||
|
||||
ALWAYS_INLINE static bool HasPendingInterrupt()
|
||||
ALWAYS_INLINE bool HasPendingInterrupt()
|
||||
{
|
||||
// const bool do_interrupt = g_state.m_cop0_regs.sr.IEc && ((g_state.m_cop0_regs.cause.Ip & g_state.m_cop0_regs.sr.Im)
|
||||
// != 0);
|
||||
const bool do_interrupt = g_state.cop0_regs.sr.IEc &&
|
||||
return g_state.cop0_regs.sr.IEc &&
|
||||
(((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
|
||||
|
||||
const bool interrupt_delay = g_state.interrupt_delay;
|
||||
g_state.interrupt_delay = false;
|
||||
|
||||
return do_interrupt && !interrupt_delay;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void DispatchInterrupt()
|
||||
ALWAYS_INLINE void CheckForPendingInterrupt()
|
||||
{
|
||||
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
|
||||
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
|
||||
if (g_state.next_instruction.IsCop2Instruction())
|
||||
return;
|
||||
|
||||
// Interrupt raising occurs before the start of the instruction.
|
||||
RaiseException(
|
||||
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
|
||||
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
|
||||
g_state.regs.pc);
|
||||
if (HasPendingInterrupt())
|
||||
g_state.downcount = 0;
|
||||
}
|
||||
|
||||
void DispatchInterrupt();
|
||||
|
||||
// icache stuff
|
||||
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
|
||||
{
|
||||
|
@ -72,5 +60,7 @@ bool ReadMemoryWord(VirtualMemoryAddress addr, u32* value);
|
|||
bool WriteMemoryByte(VirtualMemoryAddress addr, u8 value);
|
||||
bool WriteMemoryHalfWord(VirtualMemoryAddress addr, u16 value);
|
||||
bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
|
||||
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
|
||||
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
|
||||
|
||||
} // namespace CPU
|
|
@ -19,8 +19,7 @@ u32 CodeGenerator::CalculateRegisterOffset(Reg reg)
|
|||
return u32(offsetof(State, regs.r[0]) + (static_cast<u32>(reg) * sizeof(u32)));
|
||||
}
|
||||
|
||||
bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code,
|
||||
u32* out_host_code_size)
|
||||
bool CodeGenerator::CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size)
|
||||
{
|
||||
// TODO: Align code buffer.
|
||||
|
||||
|
@ -40,8 +39,10 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
|||
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
|
||||
#endif
|
||||
|
||||
m_current_instruction = cbi;
|
||||
if (!CompileInstruction(*cbi))
|
||||
{
|
||||
m_current_instruction = nullptr;
|
||||
m_block_end = nullptr;
|
||||
m_block_start = nullptr;
|
||||
m_block = nullptr;
|
||||
|
@ -60,6 +61,7 @@ bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePoin
|
|||
|
||||
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
|
||||
|
||||
m_current_instruction = nullptr;
|
||||
m_block_end = nullptr;
|
||||
m_block_start = nullptr;
|
||||
m_block = nullptr;
|
||||
|
@ -845,8 +847,8 @@ void CodeGenerator::BlockPrologue()
|
|||
|
||||
// we don't know the state of the last block, so assume load delays might be in progress
|
||||
// TODO: Pull load delay into register cache
|
||||
m_current_instruction_in_branch_delay_slot_dirty = true;
|
||||
m_branch_was_taken_dirty = true;
|
||||
m_current_instruction_in_branch_delay_slot_dirty = g_settings.cpu_recompiler_memory_exceptions;
|
||||
m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions;
|
||||
m_current_instruction_was_branch_taken_dirty = false;
|
||||
m_load_delay_dirty = true;
|
||||
|
||||
|
@ -909,7 +911,7 @@ void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCou
|
|||
return;
|
||||
}
|
||||
|
||||
if (cbi.is_branch_delay_slot)
|
||||
if (cbi.is_branch_delay_slot && g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
// m_current_instruction_in_branch_delay_slot = true
|
||||
EmitStoreCPUStructField(offsetof(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
|
||||
|
@ -1895,7 +1897,22 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
|
|||
value = AndValues(value, Value::FromConstantU32(write_mask));
|
||||
}
|
||||
|
||||
// changing SR[Isc] needs to update fastmem views
|
||||
if (reg == Cop0Reg::SR && g_settings.cpu_fastmem)
|
||||
{
|
||||
LabelType skip_fastmem_update;
|
||||
Value old_value = m_register_cache.AllocateScratch(RegSize_32);
|
||||
EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset);
|
||||
EmitStoreCPUStructField(offset, value);
|
||||
EmitXor(old_value.host_reg, old_value.host_reg, value);
|
||||
EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_fastmem_update);
|
||||
EmitFunctionCall(nullptr, &Thunks::UpdateFastmemMapping, m_register_cache.GetCPUPtr());
|
||||
EmitBindLabel(&skip_fastmem_update);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitStoreCPUStructField(offset, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1913,21 +1930,8 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
|
|||
EmitBranchIfBitClear(sr_value.host_reg, sr_value.size, 0, &no_interrupt);
|
||||
EmitAnd(sr_value.host_reg, sr_value.host_reg, cause_value);
|
||||
EmitTest(sr_value.host_reg, Value::FromConstantU32(0xFF00));
|
||||
sr_value.ReleaseAndClear();
|
||||
cause_value.ReleaseAndClear();
|
||||
EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
|
||||
|
||||
EmitBranch(GetCurrentFarCodePointer());
|
||||
SwitchToFarCode();
|
||||
|
||||
// we want to flush pc here
|
||||
m_register_cache.PushState();
|
||||
m_register_cache.FlushAllGuestRegisters(false, true);
|
||||
WriteNewPC(CalculatePC(), false);
|
||||
EmitExceptionExit();
|
||||
m_register_cache.PopState();
|
||||
|
||||
SwitchToNearCode();
|
||||
EmitStoreCPUStructField(offsetof(State, downcount), Value::FromConstantU32(0));
|
||||
EmitBindLabel(&no_interrupt);
|
||||
}
|
||||
|
||||
|
@ -1962,6 +1966,16 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi)
|
|||
|
||||
EmitStoreCPUStructField(offsetof(State, cop0_regs.sr.bits), sr);
|
||||
|
||||
Value cause_value = m_register_cache.AllocateScratch(RegSize_32);
|
||||
EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, offsetof(State, cop0_regs.cause.bits));
|
||||
|
||||
LabelType no_interrupt;
|
||||
EmitAnd(sr.host_reg, sr.host_reg, cause_value);
|
||||
EmitTest(sr.host_reg, Value::FromConstantU32(0xFF00));
|
||||
EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
|
||||
EmitStoreCPUStructField(offsetof(State, downcount), Value::FromConstantU32(0));
|
||||
EmitBindLabel(&no_interrupt);
|
||||
|
||||
InstructionEpilogue(cbi);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,11 @@ public:
|
|||
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
|
||||
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
|
||||
|
||||
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
static bool BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi);
|
||||
|
||||
bool CompileBlock(CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
|
||||
|
||||
CodeBlock::HostCodePointer CompileDispatcher();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Code Generation
|
||||
|
@ -67,10 +71,15 @@ public:
|
|||
void EmitAddCPUStructField(u32 offset, const Value& value);
|
||||
void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr);
|
||||
void EmitStoreGlobal(void* ptr, const Value& value);
|
||||
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
|
||||
|
||||
// Automatically generates an exception handler.
|
||||
Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size);
|
||||
void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result);
|
||||
void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code);
|
||||
void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
|
||||
void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value);
|
||||
void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code);
|
||||
|
||||
// Unconditional branch to pointer. May allocate a scratch register.
|
||||
void EmitBranch(const void* address, bool allow_scratch = true);
|
||||
|
@ -201,9 +210,10 @@ private:
|
|||
bool Compile_cop2(const CodeBlockInstruction& cbi);
|
||||
|
||||
JitCodeBuffer* m_code_buffer;
|
||||
const CodeBlock* m_block = nullptr;
|
||||
CodeBlock* m_block = nullptr;
|
||||
const CodeBlockInstruction* m_block_start = nullptr;
|
||||
const CodeBlockInstruction* m_block_end = nullptr;
|
||||
const CodeBlockInstruction* m_current_instruction = nullptr;
|
||||
RegisterCache m_register_cache;
|
||||
CodeEmitter m_near_emitter;
|
||||
CodeEmitter m_far_emitter;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "cpu_recompiler_code_generator.h"
|
||||
#include "cpu_recompiler_thunks.h"
|
||||
#include "settings.h"
|
||||
#include "timing_event.h"
|
||||
Log_SetChannel(CPU::Recompiler);
|
||||
|
||||
namespace a64 = vixl::aarch64;
|
||||
|
@ -13,6 +14,7 @@ namespace a64 = vixl::aarch64;
|
|||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 19;
|
||||
constexpr HostReg RMEMBASEPTR = 20;
|
||||
constexpr HostReg RRETURN = 0;
|
||||
constexpr HostReg RARG1 = 0;
|
||||
constexpr HostReg RARG2 = 1;
|
||||
|
@ -26,6 +28,16 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
|
|||
constexpr u64 FUNCTION_STACK_SIZE =
|
||||
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
|
||||
|
||||
// PC we return to after the end of the block
|
||||
static void* s_dispatcher_return_address;
|
||||
|
||||
static s64 GetPCDisplacement(const void* current, const void* target)
|
||||
{
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
|
||||
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
|
||||
}
|
||||
|
||||
static const a64::WRegister GetHostReg8(HostReg reg)
|
||||
{
|
||||
return a64::WRegister(reg);
|
||||
|
@ -75,6 +87,11 @@ static const a64::XRegister GetCPUPtrReg()
|
|||
return GetHostReg64(RCPUPTR);
|
||||
}
|
||||
|
||||
static const a64::XRegister GetFastmemBasePtrReg()
|
||||
{
|
||||
return GetHostReg64(RMEMBASEPTR);
|
||||
}
|
||||
|
||||
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
|
||||
: m_code_buffer(code_buffer), m_register_cache(*this),
|
||||
m_near_emitter(static_cast<vixl::byte*>(code_buffer->GetFreeCodePointer()), code_buffer->GetFreeCodeSpace(),
|
||||
|
@ -172,19 +189,31 @@ void CodeGenerator::EmitBeginBlock()
|
|||
// Save the link register, since we'll be calling functions.
|
||||
const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
|
||||
DebugAssert(link_reg_allocated);
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer. TODO: make this better.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
{
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
|
||||
m_emit->Ret();
|
||||
}
|
||||
|
||||
|
@ -200,6 +229,7 @@ void CodeGenerator::EmitExceptionExit()
|
|||
m_register_cache.PopCalleeSavedRegisters(false);
|
||||
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
|
||||
m_emit->Ret();
|
||||
}
|
||||
|
||||
|
@ -958,13 +988,6 @@ void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
|
|||
m_register_cache.PopCallerSavedRegisters();
|
||||
}
|
||||
|
||||
static s64 GetBranchDisplacement(const void* current, const void* target)
|
||||
{
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
|
||||
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
|
||||
return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
|
||||
{
|
||||
if (return_value)
|
||||
|
@ -974,7 +997,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
|
|||
const u32 adjust_size = PrepareStackForCall();
|
||||
|
||||
// actually call the function
|
||||
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1009,7 +1032,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
EmitCopyValue(RARG1, arg1);
|
||||
|
||||
// actually call the function
|
||||
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1045,7 +1068,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
EmitCopyValue(RARG2, arg2);
|
||||
|
||||
// actually call the function
|
||||
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1083,7 +1106,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
EmitCopyValue(RARG3, arg3);
|
||||
|
||||
// actually call the function
|
||||
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1122,7 +1145,7 @@ void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, co
|
|||
EmitCopyValue(RARG4, arg4);
|
||||
|
||||
// actually call the function
|
||||
const s64 displacement = GetBranchDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const s64 displacement = GetPCDisplacement(GetCurrentCodePointer(), ptr);
|
||||
const bool use_blr = !vixl::IsInt26(displacement);
|
||||
if (use_blr)
|
||||
{
|
||||
|
@ -1283,14 +1306,124 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
|||
|
||||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
||||
{
|
||||
if (address.IsConstant())
|
||||
{
|
||||
TickCount read_ticks;
|
||||
void* ptr = GetDirectReadMemoryPointer(
|
||||
static_cast<u32>(address.constant_value),
|
||||
(size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
|
||||
&read_ticks);
|
||||
if (ptr)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(size);
|
||||
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
|
||||
m_delayed_cycles_add += read_ticks;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
a64::MemOperand actual_address;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg));
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
|
||||
}
|
||||
|
||||
// TODO: movsx/zx inline here
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Ldr(GetHostReg32(result.host_reg), actual_address);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
// We need to use the full 64 bits here since we test the sign bit result.
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
// NOTE: This can leave junk in the upper bits
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1319,6 +1452,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->Bind(&load_okay);
|
||||
|
||||
// load exception path
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (-result << 2) | BD | cop_n
|
||||
|
@ -1330,37 +1464,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1379,19 +1490,73 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
||||
{
|
||||
if (address.IsConstant())
|
||||
{
|
||||
void* ptr = GetDirectWriteMemoryPointer(
|
||||
static_cast<u32>(address.constant_value),
|
||||
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
|
||||
if (ptr)
|
||||
{
|
||||
EmitStoreGlobal(ptr, value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
// we need the value in a host register to store it
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
EmitStoreGuestMemoryFastmem(cbi, address, value_in_hr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
a64::MemOperand actual_address;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH));
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address));
|
||||
}
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
m_emit->Strb(GetHostReg8(value), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
m_emit->Strh(GetHostReg16(value), actual_address);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Str(GetHostReg32(value), actual_address);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1399,19 +1564,33 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value, bool in_far_code)
|
||||
{
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
Assert(!in_far_code);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1439,6 +1618,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->Bind(&store_okay);
|
||||
|
||||
// store exception path
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (result << 2) | BD | cop_n
|
||||
|
@ -1448,6 +1628,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
static_cast<Exception>(0), cbi.is_branch_delay_slot, false, cbi.instruction.cop.cop_n)));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
if (!in_far_code)
|
||||
EmitExceptionExit();
|
||||
SwitchToNearCode();
|
||||
|
||||
|
@ -1455,8 +1636,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1478,14 +1657,76 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc);
|
||||
|
||||
// check jump distance
|
||||
const s64 jump_distance =
|
||||
static_cast<s64>(reinterpret_cast<intptr_t>(lbi.host_slowmem_pc) - reinterpret_cast<intptr_t>(lbi.host_pc));
|
||||
Assert(Common::IsAligned(jump_distance, 4));
|
||||
Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(lbi.host_pc), lbi.host_code_size,
|
||||
a64::PositionDependentCode);
|
||||
emit.b(jump_distance >> 2);
|
||||
|
||||
const s32 nops = (static_cast<s32>(lbi.host_code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
emit.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
Panic("Not implemented");
|
||||
EmitLoadGlobalAddress(RSCRATCH, ptr);
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
|
||||
{
|
||||
Panic("Not implemented");
|
||||
Value value_in_hr = GetValueInHostRegister(value);
|
||||
|
||||
EmitLoadGlobalAddress(RSCRATCH, ptr);
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitFlushInterpreterLoadDelay()
|
||||
|
@ -1814,4 +2055,136 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
|
|||
m_emit->Bind(label);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
||||
{
|
||||
const void* current_code_ptr_page = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const void* ptr_page =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
|
||||
const s64 page_displacement = GetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
|
||||
const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
|
||||
if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
|
||||
{
|
||||
m_emit->adrp(GetHostReg64(host_reg), page_displacement);
|
||||
m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr));
|
||||
}
|
||||
}
|
||||
|
||||
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
|
||||
{
|
||||
m_emit->Sub(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_register_cache.ReserveCallerSavedRegisters();
|
||||
|
||||
EmitLoadGlobalAddress(RCPUPTR, &g_state);
|
||||
|
||||
a64::Label frame_done_loop;
|
||||
a64::Label exit_dispatcher;
|
||||
m_emit->Bind(&frame_done_loop);
|
||||
|
||||
// if frame_done goto exit_dispatcher
|
||||
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
|
||||
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
|
||||
|
||||
// x8 <- sr
|
||||
a64::Label no_interrupt;
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
|
||||
|
||||
// if Iec == 0 then goto no_interrupt
|
||||
m_emit->tbz(a64::w8, 0, &no_interrupt);
|
||||
|
||||
// x9 <- cause
|
||||
// x8 (sr) & cause
|
||||
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
|
||||
m_emit->and_(a64::w8, a64::w8, a64::w9);
|
||||
|
||||
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
|
||||
m_emit->tst(a64::w8, 0xFF00);
|
||||
m_emit->b(&no_interrupt, a64::eq);
|
||||
|
||||
// we have an interrupt
|
||||
EmitFunctionCall(nullptr, &DispatchInterrupt);
|
||||
|
||||
// no interrupt or we just serviced it
|
||||
m_emit->Bind(&no_interrupt);
|
||||
|
||||
// TimingEvents::UpdateCPUDowncount:
|
||||
// x8 <- head event->downcount
|
||||
// downcount <- x8
|
||||
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
|
||||
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
|
||||
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
|
||||
|
||||
// main dispatch loop
|
||||
a64::Label main_loop;
|
||||
m_emit->Bind(&main_loop);
|
||||
s_dispatcher_return_address = GetCurrentCodePointer();
|
||||
|
||||
// w8 <- pending_ticks
|
||||
// w9 <- downcount
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
|
||||
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
|
||||
|
||||
// while downcount < pending_ticks
|
||||
a64::Label downcount_hit;
|
||||
m_emit->cmp(a64::w8, a64::w9);
|
||||
m_emit->b(&downcount_hit, a64::ge);
|
||||
|
||||
// time to lookup the block
|
||||
// w8 <- pc
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
|
||||
|
||||
// w9 <- (pc & RAM_MASK) >> 2
|
||||
m_emit->and_(a64::w9, a64::w8, Bus::RAM_MASK);
|
||||
m_emit->lsr(a64::w9, a64::w9, 2);
|
||||
|
||||
// w10 <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
|
||||
m_emit->and_(a64::w10, a64::w8, Bus::BIOS_MASK);
|
||||
m_emit->lsr(a64::w10, a64::w10, 2);
|
||||
m_emit->add(a64::w10, a64::w10, FAST_MAP_RAM_SLOT_COUNT);
|
||||
|
||||
// current_instruction_pc <- pc (eax)
|
||||
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, current_instruction_pc)));
|
||||
|
||||
// if ((w8 (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use w10 as index }
|
||||
m_emit->and_(a64::w8, a64::w8, PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
m_emit->Mov(a64::w11, Bus::BIOS_BASE);
|
||||
m_emit->cmp(a64::w8, a64::w11);
|
||||
m_emit->csel(a64::w8, a64::w9, a64::w10, a64::lt);
|
||||
|
||||
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
|
||||
EmitLoadGlobalAddress(9, CodeCache::GetFastMapPointer());
|
||||
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
|
||||
m_emit->blr(a64::x8);
|
||||
|
||||
// end while
|
||||
m_emit->Bind(&downcount_hit);
|
||||
|
||||
// check events then for frame done
|
||||
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
|
||||
EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr());
|
||||
m_emit->ldr(a64::x9, a64::MemOperand(a64::x9));
|
||||
m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount)));
|
||||
m_emit->cmp(a64::w8, a64::w9);
|
||||
m_emit->b(&frame_done_loop, a64::lt);
|
||||
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
|
||||
m_emit->b(&frame_done_loop);
|
||||
|
||||
// all done
|
||||
m_emit->Bind(&exit_dispatcher);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
|
||||
m_emit->Ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -1,14 +1,19 @@
|
|||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_recompiler_code_generator.h"
|
||||
#include "cpu_recompiler_thunks.h"
|
||||
#include "settings.h"
|
||||
#include "timing_event.h"
|
||||
Log_SetChannel(Recompiler::CodeGenerator);
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
#if defined(ABI_WIN64)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RCX;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RDX;
|
||||
|
@ -18,6 +23,7 @@ constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32;
|
|||
constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16;
|
||||
#elif defined(ABI_SYSV)
|
||||
constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
|
||||
constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
|
||||
constexpr HostReg RRETURN = Xbyak::Operand::RAX;
|
||||
constexpr HostReg RARG1 = Xbyak::Operand::RDI;
|
||||
constexpr HostReg RARG2 = Xbyak::Operand::RSI;
|
||||
|
@ -76,6 +82,11 @@ static const Xbyak::Reg64 GetCPUPtrReg()
|
|||
return GetHostReg64(RCPUPTR);
|
||||
}
|
||||
|
||||
static const Xbyak::Reg64 GetFastmemBasePtrReg()
|
||||
{
|
||||
return GetHostReg64(RMEMBASEPTR);
|
||||
}
|
||||
|
||||
CodeGenerator::CodeGenerator(JitCodeBuffer* code_buffer)
|
||||
: m_code_buffer(code_buffer), m_register_cache(*this),
|
||||
m_near_emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()),
|
||||
|
@ -137,7 +148,6 @@ void CodeGenerator::InitHostRegs()
|
|||
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI,
|
||||
Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12,
|
||||
Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15});
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
#elif defined(ABI_SYSV)
|
||||
m_register_cache.SetHostRegAllocationOrder(
|
||||
{Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
|
||||
|
@ -151,8 +161,9 @@ void CodeGenerator::InitHostRegs()
|
|||
m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP,
|
||||
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14,
|
||||
Xbyak::Operand::R15});
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
#endif
|
||||
|
||||
m_register_cache.SetCPUPtrHostReg(RCPUPTR);
|
||||
}
|
||||
|
||||
void CodeGenerator::SwitchToFarCode()
|
||||
|
@ -187,15 +198,28 @@ Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_
|
|||
|
||||
void CodeGenerator::EmitBeginBlock()
|
||||
{
|
||||
m_register_cache.AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
// Store the CPU struct pointer.
|
||||
const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
// m_emit->mov(GetCPUPtrReg(), reinterpret_cast<size_t>(&g_state));
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + offsetof(CPU::State, fastmem_base)]);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitEndBlock()
|
||||
{
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
if (m_block->contains_loadstore_instructions)
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
|
||||
m_emit->ret();
|
||||
|
@ -1738,14 +1762,158 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
|||
|
||||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, RegSize size)
|
||||
{
|
||||
if (address.IsConstant())
|
||||
{
|
||||
TickCount read_ticks;
|
||||
void* ptr = GetDirectReadMemoryPointer(
|
||||
static_cast<u32>(address.constant_value),
|
||||
(size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
|
||||
&read_ticks);
|
||||
if (ptr)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(size);
|
||||
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
|
||||
m_delayed_cycles_add += read_ticks;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitLoadGuestMemoryFastmem(cbi, address, size, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, false);
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = result.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
// can't store displacements > 0x80000000 in-line
|
||||
const Value* actual_address = &address;
|
||||
if (address.IsConstant() && address.constant_value >= 0x80000000)
|
||||
{
|
||||
actual_address = &result;
|
||||
m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
// TODO: movsx/zx inline here
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg8(result.host_reg),
|
||||
m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg8(result.host_reg),
|
||||
m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg16(result.host_reg),
|
||||
m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg16(result.host_reg),
|
||||
m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
m_emit->mov(GetHostReg32(result.host_reg),
|
||||
m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(GetHostReg32(result.host_reg),
|
||||
m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: BIOS reads...
|
||||
EmitAddCPUStructField(offsetof(CPU::State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS));
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size,
|
||||
Value& result, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
// We need to use the full 64 bits here since we test the sign bit result.
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_64);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
// NOTE: This can leave junk in the upper bits
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1772,6 +1940,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_register_cache.PushState();
|
||||
|
||||
// load exception path
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (-result << 2) | BD | cop_n
|
||||
|
@ -1783,37 +1952,14 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
ConvertValueSizeInPlace(&result, RegSize_32, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1832,39 +1978,183 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
|
||||
switch (size)
|
||||
{
|
||||
case RegSize_8:
|
||||
ConvertValueSizeInPlace(&result, RegSize_8, false);
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
ConvertValueSizeInPlace(&result, RegSize_16, false);
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
break;
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const Value& value)
|
||||
{
|
||||
if (address.IsConstant())
|
||||
{
|
||||
void* ptr = GetDirectWriteMemoryPointer(
|
||||
static_cast<u32>(address.constant_value),
|
||||
(value.size == RegSize_8) ? MemoryAccessSize::Byte :
|
||||
((value.size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
|
||||
if (ptr)
|
||||
{
|
||||
EmitStoreGlobal(ptr, value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
AddPendingCycles(true);
|
||||
|
||||
if (g_settings.IsUsingFastmem())
|
||||
{
|
||||
EmitStoreGuestMemoryFastmem(cbi, address, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, false);
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value)
|
||||
{
|
||||
// fastmem
|
||||
LoadStoreBackpatchInfo bpi;
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
bpi.address_host_reg = HostReg_Invalid;
|
||||
bpi.value_host_reg = value.host_reg;
|
||||
bpi.guest_pc = m_current_instruction->pc;
|
||||
|
||||
// can't store displacements > 0x80000000 in-line
|
||||
const Value* actual_address = &address;
|
||||
Value temp_address;
|
||||
if (address.IsConstant() && address.constant_value >= 0x80000000)
|
||||
{
|
||||
temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
|
||||
actual_address = &temp_address;
|
||||
m_emit->mov(GetHostReg32(temp_address), address.constant_value);
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
}
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg8(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg8(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_16:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg16(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg16(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RegSize_32:
|
||||
{
|
||||
if (actual_address->IsConstant())
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value],
|
||||
GetHostReg32(value.host_reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value.IsConstant())
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
value.constant_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)],
|
||||
GetHostReg32(value.host_reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitStoreGuestMemorySlowmem(cbi, address, value, true);
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
SwitchToNearCode();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const Value& value, bool in_far_code)
|
||||
{
|
||||
if (g_settings.cpu_recompiler_memory_exceptions)
|
||||
{
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
Assert(!in_far_code);
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(RegSize_32);
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1890,24 +2180,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
m_emit->jnz(GetCurrentFarCodePointer());
|
||||
|
||||
// store exception path
|
||||
if (!in_far_code)
|
||||
SwitchToFarCode();
|
||||
|
||||
// cause_bits = (result << 2) | BD | cop_n
|
||||
m_emit->shl(GetHostReg32(result.host_reg), 2);
|
||||
m_emit->or_(GetHostReg32(result.host_reg),
|
||||
m_emit->shl(GetHostReg32(result), 2);
|
||||
m_emit->or_(GetHostReg32(result),
|
||||
Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), cbi.is_branch_delay_slot, false,
|
||||
cbi.instruction.cop.cop_n));
|
||||
EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
|
||||
|
||||
EmitExceptionExit();
|
||||
if (!in_far_code)
|
||||
SwitchToNearCode();
|
||||
|
||||
m_register_cache.PopState();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
|
||||
|
||||
switch (value.size)
|
||||
{
|
||||
case RegSize_8:
|
||||
|
@ -1929,6 +2219,24 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
}
|
||||
}
|
||||
|
||||
bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi)
|
||||
{
|
||||
Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem", lbi.host_pc, lbi.guest_pc);
|
||||
|
||||
// turn it into a jump to the slowmem handler
|
||||
Xbyak::CodeGenerator cg(lbi.host_code_size, lbi.host_pc);
|
||||
cg.jmp(lbi.host_slowmem_pc);
|
||||
|
||||
const s32 nops = static_cast<s32>(lbi.host_code_size) -
|
||||
static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(lbi.host_pc)));
|
||||
Assert(nops >= 0);
|
||||
for (s32 i = 0; i < nops; i++)
|
||||
cg.nop();
|
||||
|
||||
JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
|
||||
{
|
||||
const s64 displacement =
|
||||
|
@ -2486,4 +2794,123 @@ void CodeGenerator::EmitBindLabel(LabelType* label)
|
|||
m_emit->L(*label);
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
|
||||
{
|
||||
const s64 displacement =
|
||||
static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
|
||||
if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
|
||||
m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]);
|
||||
else
|
||||
m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
|
||||
}
|
||||
|
||||
CodeBlock::HostCodePointer CodeGenerator::CompileDispatcher()
|
||||
{
|
||||
m_register_cache.ReserveCallerSavedRegisters();
|
||||
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
|
||||
|
||||
Xbyak::Label frame_done_loop;
|
||||
Xbyak::Label exit_dispatcher;
|
||||
m_emit->L(frame_done_loop);
|
||||
|
||||
// if frame_done goto exit_dispatcher
|
||||
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
|
||||
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
|
||||
|
||||
// eax <- sr
|
||||
Xbyak::Label no_interrupt;
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
|
||||
|
||||
// if Iec == 0 then goto no_interrupt
|
||||
m_emit->test(m_emit->eax, 1);
|
||||
m_emit->jz(no_interrupt);
|
||||
|
||||
// sr & cause
|
||||
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
|
||||
|
||||
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
|
||||
m_emit->test(m_emit->eax, 0xFF00);
|
||||
m_emit->jz(no_interrupt);
|
||||
|
||||
// we have an interrupt
|
||||
EmitFunctionCall(nullptr, &DispatchInterrupt);
|
||||
|
||||
// no interrupt or we just serviced it
|
||||
m_emit->L(no_interrupt);
|
||||
|
||||
// TimingEvents::UpdateCPUDowncount:
|
||||
// eax <- head event->downcount
|
||||
// downcount <- eax
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
|
||||
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
|
||||
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
|
||||
|
||||
// main dispatch loop
|
||||
Xbyak::Label main_loop;
|
||||
m_emit->align(16);
|
||||
m_emit->L(main_loop);
|
||||
|
||||
// eax <- pending_ticks
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
|
||||
|
||||
// while eax < downcount
|
||||
Xbyak::Label downcount_hit;
|
||||
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
|
||||
m_emit->jge(downcount_hit);
|
||||
|
||||
// time to lookup the block
|
||||
// eax <- pc
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
|
||||
|
||||
// ebx <- (pc & RAM_MASK) >> 2
|
||||
m_emit->mov(m_emit->ebx, m_emit->eax);
|
||||
m_emit->and_(m_emit->ebx, Bus::RAM_MASK);
|
||||
m_emit->shr(m_emit->ebx, 2);
|
||||
|
||||
// ecx <- ((pc & BIOS_MASK) >> 2) + FAST_MAP_RAM_SLOT_COUNT
|
||||
m_emit->mov(m_emit->ecx, m_emit->eax);
|
||||
m_emit->and_(m_emit->ecx, Bus::BIOS_MASK);
|
||||
m_emit->shr(m_emit->ecx, 2);
|
||||
m_emit->add(m_emit->ecx, FAST_MAP_RAM_SLOT_COUNT);
|
||||
|
||||
// current_instruction_pc <- pc (eax)
|
||||
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, current_instruction_pc)], m_emit->eax);
|
||||
|
||||
// if ((eax (pc) & PHYSICAL_MEMORY_ADDRESS_MASK) >= BIOS_BASE) { use ecx as index }
|
||||
m_emit->and_(m_emit->eax, PHYSICAL_MEMORY_ADDRESS_MASK);
|
||||
m_emit->cmp(m_emit->eax, Bus::BIOS_BASE);
|
||||
m_emit->cmovge(m_emit->ebx, m_emit->ecx);
|
||||
|
||||
// ebx contains our index, rax <- fast_map[ebx * 8], rax(), continue
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RAX, CodeCache::GetFastMapPointer());
|
||||
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax + m_emit->rbx * 8]);
|
||||
m_emit->call(m_emit->rax);
|
||||
m_emit->jmp(main_loop);
|
||||
|
||||
// end while
|
||||
m_emit->L(downcount_hit);
|
||||
|
||||
// check events then for frame done
|
||||
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
|
||||
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
|
||||
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
|
||||
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
|
||||
m_emit->jg(frame_done_loop);
|
||||
EmitFunctionCall(nullptr, &TimingEvents::RunEvents);
|
||||
m_emit->jmp(frame_done_loop);
|
||||
|
||||
// all done
|
||||
m_emit->L(exit_dispatcher);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
m_emit->ret();
|
||||
|
||||
CodeBlock::HostCodePointer ptr;
|
||||
u32 code_size;
|
||||
FinalizeBlock(&ptr, &code_size);
|
||||
Log_InfoPrintf("Dispatcher is %u bytes at %p", code_size, ptr);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace CPU::Recompiler
|
||||
|
|
|
@ -351,6 +351,33 @@ u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
|
|||
return count;
|
||||
}
|
||||
|
||||
void RegisterCache::ReserveCallerSavedRegisters()
|
||||
{
|
||||
for (u32 reg = 0; reg < HostReg_Count; reg++)
|
||||
{
|
||||
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
|
||||
HostRegState::CalleeSaved)
|
||||
{
|
||||
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
|
||||
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
|
||||
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
|
||||
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCache::AssumeCalleeSavedRegistersAreSaved()
|
||||
{
|
||||
for (u32 i = 0; i < HostReg_Count; i++)
|
||||
{
|
||||
if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
|
||||
HostRegState::CalleeSaved)
|
||||
{
|
||||
m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegisterCache::PushState()
|
||||
{
|
||||
// need to copy this manually because of the load delay values
|
||||
|
|
|
@ -248,6 +248,12 @@ public:
|
|||
/// Restore callee-saved registers. Call at the end of the function.
|
||||
u32 PopCalleeSavedRegisters(bool commit);
|
||||
|
||||
/// Preallocates caller saved registers, enabling later use without stack pushes.
|
||||
void ReserveCallerSavedRegisters();
|
||||
|
||||
/// Removes the callee-saved register flag from all registers. Call when compiling code blocks.
|
||||
void AssumeCalleeSavedRegistersAreSaved();
|
||||
|
||||
/// Pushes the register allocator state, use when entering branched code.
|
||||
void PushState();
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ void UncheckedWriteMemoryByte(u32 address, u8 value);
|
|||
void UncheckedWriteMemoryHalfWord(u32 address, u16 value);
|
||||
void UncheckedWriteMemoryWord(u32 address, u32 value);
|
||||
|
||||
void UpdateFastmemMapping();
|
||||
|
||||
} // namespace Recompiler::Thunks
|
||||
|
||||
|
|
|
@ -127,6 +127,16 @@ constexpr bool SHIFTS_ARE_IMPLICITLY_MASKED = false;
|
|||
|
||||
#endif
|
||||
|
||||
struct LoadStoreBackpatchInfo
|
||||
{
|
||||
void* host_pc; // pointer to instruction which will fault
|
||||
void* host_slowmem_pc; // pointer to slowmem callback code
|
||||
u32 host_code_size; // size of the fastmem load as well as the add for cycles
|
||||
HostReg address_host_reg; // register containing the guest address to load/store
|
||||
HostReg value_host_reg; // register containing the source/destination
|
||||
PhysicalMemoryAddress guest_pc;
|
||||
};
|
||||
|
||||
} // namespace Recompiler
|
||||
|
||||
} // namespace CPU
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "common/log.h"
|
||||
#include "common/state_wrapper.h"
|
||||
#include "common/string_util.h"
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_core.h"
|
||||
#include "gpu.h"
|
||||
#include "interrupt_controller.h"
|
||||
|
@ -499,7 +500,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
|
|||
|
||||
const u32 terminator = UINT32_C(0xFFFFFF);
|
||||
std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator));
|
||||
Bus::InvalidateCodePages(address, word_count);
|
||||
CPU::CodeCache::InvalidateCodePages(address, word_count);
|
||||
return Bus::GetDMARAMTickCount(word_count);
|
||||
}
|
||||
|
||||
|
@ -547,6 +548,6 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
|
|||
}
|
||||
}
|
||||
|
||||
Bus::InvalidateCodePages(address, word_count);
|
||||
CPU::CodeCache::InvalidateCodePages(address, word_count);
|
||||
return Bus::GetDMARAMTickCount(word_count);
|
||||
}
|
||||
|
|
|
@ -367,6 +367,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si)
|
|||
si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
si.SetBoolValue("CPU", "ICache", false);
|
||||
si.SetBoolValue("CPU", "Fastmem", true);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER));
|
||||
si.SetIntValue("GPU", "ResolutionScale", 1);
|
||||
|
@ -513,12 +514,13 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
|
|||
if (g_settings.emulation_speed != old_settings.emulation_speed)
|
||||
System::UpdateThrottlePeriod();
|
||||
|
||||
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode)
|
||||
if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode ||
|
||||
g_settings.cpu_fastmem != old_settings.cpu_fastmem)
|
||||
{
|
||||
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode.",
|
||||
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode));
|
||||
CPU::CodeCache::SetUseRecompiler(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
CPU::CodeCache::Flush();
|
||||
AddFormattedOSDMessage(5.0f, "Switching to %s CPU execution mode%s.",
|
||||
Settings::GetCPUExecutionModeName(g_settings.cpu_execution_mode),
|
||||
g_settings.cpu_fastmem ? " (fastmem)" : "");
|
||||
CPU::CodeCache::Reinitialize();
|
||||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ void Settings::Load(SettingsInterface& si)
|
|||
.value_or(DEFAULT_CPU_EXECUTION_MODE);
|
||||
cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false);
|
||||
cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false);
|
||||
cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true);
|
||||
|
||||
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
|
||||
.value_or(DEFAULT_GPU_RENDERER);
|
||||
|
@ -217,6 +218,7 @@ void Settings::Save(SettingsInterface& si) const
|
|||
si.SetStringValue("CPU", "ExecutionMode", GetCPUExecutionModeName(cpu_execution_mode));
|
||||
si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions);
|
||||
si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache);
|
||||
si.SetBoolValue("CPU", "Fastmem", cpu_fastmem);
|
||||
|
||||
si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer));
|
||||
si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str());
|
||||
|
|
|
@ -73,6 +73,7 @@ struct Settings
|
|||
bool cpu_recompiler_memory_exceptions = false;
|
||||
bool cpu_recompiler_icache = false;
|
||||
bool cpu_thread = true;
|
||||
bool cpu_fastmem = true;
|
||||
|
||||
float emulation_speed = 1.0f;
|
||||
bool speed_limiter_enabled = true;
|
||||
|
@ -173,6 +174,11 @@ struct Settings
|
|||
return gpu_pgxp_enable ? (gpu_pgxp_cpu ? PGXPMode::CPU : PGXPMode::Memory) : PGXPMode::Disabled;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool IsUsingFastmem() const
|
||||
{
|
||||
return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions);
|
||||
}
|
||||
|
||||
bool HasAnyPerGameMemoryCards() const;
|
||||
|
||||
enum : u32
|
||||
|
|
|
@ -727,12 +727,14 @@ bool Initialize(bool force_software_renderer)
|
|||
TimingEvents::Initialize();
|
||||
|
||||
CPU::Initialize();
|
||||
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
|
||||
Bus::Initialize();
|
||||
|
||||
if (!Bus::Initialize())
|
||||
return false;
|
||||
|
||||
CPU::CodeCache::Initialize();
|
||||
|
||||
g_gpu.Initialize();
|
||||
g_dma.Initialize();
|
||||
|
||||
g_interrupt_controller.Initialize();
|
||||
|
||||
g_cdrom.Initialize();
|
||||
|
|
|
@ -13,7 +13,6 @@ static TimingEvent* s_active_events_tail;
|
|||
static TimingEvent* s_current_event = nullptr;
|
||||
static u32 s_active_event_count = 0;
|
||||
static u32 s_global_tick_counter = 0;
|
||||
static u32 s_last_event_run_time = 0;
|
||||
|
||||
u32 GetGlobalTickCounter()
|
||||
{
|
||||
|
@ -28,7 +27,6 @@ void Initialize()
|
|||
void Reset()
|
||||
{
|
||||
s_global_tick_counter = 0;
|
||||
s_last_event_run_time = 0;
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
|
@ -53,6 +51,11 @@ void UpdateCPUDowncount()
|
|||
CPU::g_state.downcount = s_active_events_head->GetDowncount();
|
||||
}
|
||||
|
||||
TimingEvent** GetHeadEventPtr()
|
||||
{
|
||||
return &s_active_events_head;
|
||||
}
|
||||
|
||||
static void SortEvent(TimingEvent* event)
|
||||
{
|
||||
const TickCount event_downcount = event->m_downcount;
|
||||
|
@ -255,7 +258,7 @@ void RunEvents()
|
|||
{
|
||||
DebugAssert(!s_current_event);
|
||||
|
||||
TickCount pending_ticks = (s_global_tick_counter + CPU::GetPendingTicks()) - s_last_event_run_time;
|
||||
TickCount pending_ticks = CPU::GetPendingTicks();
|
||||
CPU::ResetPendingTicks();
|
||||
while (pending_ticks > 0)
|
||||
{
|
||||
|
@ -291,7 +294,6 @@ void RunEvents()
|
|||
}
|
||||
}
|
||||
|
||||
s_last_event_run_time = s_global_tick_counter;
|
||||
s_current_event = nullptr;
|
||||
UpdateCPUDowncount();
|
||||
}
|
||||
|
@ -333,8 +335,6 @@ bool DoState(StateWrapper& sw)
|
|||
event->m_interval = interval;
|
||||
}
|
||||
|
||||
sw.Do(&s_last_event_run_time);
|
||||
|
||||
Log_DevPrintf("Loaded %u events from save state.", event_count);
|
||||
SortEvents();
|
||||
}
|
||||
|
@ -352,8 +352,6 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&event->m_interval);
|
||||
}
|
||||
|
||||
sw.Do(&s_last_event_run_time);
|
||||
|
||||
Log_DevPrintf("Wrote %u events to save state.", s_active_event_count);
|
||||
}
|
||||
|
||||
|
|
|
@ -88,6 +88,8 @@ void RunEvents();
|
|||
|
||||
void UpdateCPUDowncount();
|
||||
|
||||
TimingEvent** GetHeadEventPtr();
|
||||
|
||||
|
||||
|
||||
} // namespace TimingEventManager
|
|
@ -129,6 +129,6 @@ enum : u32
|
|||
|
||||
enum : u32
|
||||
{
|
||||
CPU_CODE_CACHE_PAGE_SIZE = 1024,
|
||||
CPU_CODE_CACHE_PAGE_SIZE = 4096,
|
||||
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue