New Recompiler

This commit is contained in:
Stenzek 2023-08-15 23:14:40 +10:00
parent 5166a946da
commit d820c971c6
27 changed files with 11858 additions and 30 deletions

View File

@ -23,6 +23,7 @@ endif()
# Renderer options.
option(ENABLE_OPENGL "Build with OpenGL renderer" ON)
option(ENABLE_VULKAN "Build with Vulkan renderer" ON)
option(ENABLE_NEWREC "Build with experimental new dynarec (needed for RISC-V)" ON)
# Global options.
if(NOT ANDROID)

View File

@ -120,6 +120,14 @@ set(RECOMPILER_SRCS
cpu_recompiler_types.h
)
set(NEWREC_SOURCES
cpu_newrec.cpp
cpu_newrec.h
cpu_newrec_compiler.cpp
cpu_newrec_compiler.h
cpu_newrec_private.h
)
target_precompile_headers(core PRIVATE "pch.h")
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
@ -130,9 +138,16 @@ if(${CPU_ARCH} STREQUAL "x64")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak")
target_compile_definitions(core PUBLIC "XBYAK_NO_EXCEPTION=1" "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
cpu_recompiler_code_generator_x64.cpp
)
message("Building x64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_x64.cpp
cpu_newrec_compiler_x64.h
)
message("Building x64 newrec")
endif()
elseif(${CPU_ARCH} STREQUAL "aarch32")
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
@ -147,6 +162,25 @@ elseif(${CPU_ARCH} STREQUAL "aarch64")
)
target_link_libraries(core PUBLIC vixl)
message("Building AArch64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_aarch64.cpp
cpu_newrec_compiler_aarch64.h
)
message("Building AArch64 newrec")
endif()
elseif(${CPU_ARCH} STREQUAL "riscv64")
target_compile_definitions(core PUBLIC "WITH_MMAP_FASTMEM=1")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_riscv64.cpp
cpu_newrec_compiler_riscv64.h
)
target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas)
message("Building RISC-V 64-bit newrec")
endif()
else()
message("Not building recompiler")
endif()

View File

@ -8,6 +8,7 @@
<PreprocessorDefinitions Condition="('$(Platform)'!='ARM64')">ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_NEWREC=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'!='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\rainterface</AdditionalIncludeDirectories>

View File

@ -13,6 +13,14 @@
<ClCompile Include="cpu_core.cpp" />
<ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_newrec.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_newrec_compiler_x64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_recompiler_code_generator.cpp">
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
</ClCompile>
@ -89,6 +97,15 @@
<ClInclude Include="cpu_core_private.h" />
<ClInclude Include="cpu_disasm.h" />
<ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_newrec.h" />
<ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_newrec_compiler_x64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_newrec_private.h" />
<ClInclude Include="cpu_recompiler_code_generator.h">
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
</ClInclude>
@ -176,6 +193,9 @@
<ProjectReference Include="..\..\dep\zstd\zstd.vcxproj">
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
</ProjectReference>
<ProjectReference Include="..\..\dep\zydis\zydis.vcxproj">
<Project>{c51a346a-86b2-46df-9bb3-d0aa7e5d8699}</Project>
</ProjectReference>
<ProjectReference Include="..\scmversion\scmversion.vcxproj">
<Project>{075ced82-6a20-46df-94c7-9624ac9ddbeb}</Project>
</ProjectReference>

View File

@ -60,6 +60,10 @@
<ClCompile Include="hotkeys.cpp" />
<ClCompile Include="gpu_shadergen.cpp" />
<ClCompile Include="pch.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec.cpp" />
<ClCompile Include="cpu_newrec_compiler_x64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="types.h" />
@ -124,5 +128,10 @@
<ClInclude Include="shader_cache_version.h" />
<ClInclude Include="gpu_shadergen.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="cpu_newrec.h" />
<ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_private.h" />
<ClInclude Include="cpu_newrec_compiler_x64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h" />
</ItemGroup>
</Project>
</Project>

View File

@ -8,6 +8,7 @@
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
#include "cpu_newrec_private.h"
#include "cpu_recompiler_types.h"
#include "settings.h"
#include "system.h"
@ -29,7 +30,7 @@ static constexpr u32 RECOMPILE_FRAMES_TO_FALL_BACK_TO_INTERPRETER = 100;
static constexpr u32 RECOMPILE_COUNT_TO_FALL_BACK_TO_INTERPRETER = 20;
static constexpr u32 INVALIDATE_THRESHOLD_TO_DISABLE_LINKING = 10;
#ifdef ENABLE_RECOMPILER
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
// Currently remapping the code buffer doesn't work in macOS or Haiku.
#if !defined(__HAIKU__) && !defined(__APPLE__)
@ -247,8 +248,8 @@ void Initialize()
{
Assert(s_blocks.empty());
#ifdef ENABLE_RECOMPILER
if (g_settings.IsUsingRecompiler())
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
if (g_settings.IsUsingAnyRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
const bool has_buffer = s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage),
@ -263,7 +264,15 @@ void Initialize()
}
#endif
AllocateFastMap();
#ifdef ENABLE_NEWREC
if (g_settings.IsUsingNewRec())
{
if (!CPU::NewRec::Initialize())
Panic("Failed to initialize newrec");
return;
}
#endif
#ifdef ENABLE_RECOMPILER
if (g_settings.IsUsingRecompiler())
@ -297,7 +306,12 @@ void ClearState()
void Shutdown()
{
#ifdef ENABLE_NEWREC
NewRec::Shutdown();
#endif
ClearState();
#ifdef ENABLE_RECOMPILER
ShutdownFastmem();
FreeFastMap();
@ -455,6 +469,12 @@ FastMapTable* GetFastMapPointer()
break;
#endif
#ifdef ENABLE_NEWREC
case CPUExecutionMode::NewRec:
CPU::NewRec::Execute();
break;
#endif
default:
{
if (g_settings.gpu_pgxp_enable)
@ -473,7 +493,7 @@ FastMapTable* GetFastMapPointer()
}
}
#if defined(ENABLE_RECOMPILER)
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
JitCodeBuffer& GetCodeBuffer()
{
@ -484,16 +504,18 @@ JitCodeBuffer& GetCodeBuffer()
void Reinitialize()
{
ClearState();
#ifdef ENABLE_NEWREC
NewRec::Shutdown();
#endif
#ifdef ENABLE_RECOMPILER
ShutdownFastmem();
#endif
#if defined(ENABLE_RECOMPILER)
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
if (g_settings.IsUsingAnyRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
@ -507,6 +529,14 @@ void Reinitialize()
}
#endif
#ifdef ENABLE_NEWREC
if (g_settings.IsUsingNewRec())
{
if (!CPU::NewRec::Initialize())
Panic("Failed to reinitialize NewRec");
}
#endif
#ifdef ENABLE_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
@ -522,6 +552,15 @@ void Reinitialize()
void Flush()
{
#ifdef ENABLE_NEWREC
if (g_settings.IsUsingNewRec())
{
s_code_buffer.Reset();
NewRec::Reset();
return;
}
#endif
ClearState();
#ifdef ENABLE_RECOMPILER
if (g_settings.IsUsingRecompiler())
@ -929,6 +968,14 @@ static void InvalidateBlock(CodeBlock* block, bool allow_frame_invalidation)
void InvalidateBlocksWithPageIndex(u32 page_index)
{
#ifdef ENABLE_NEWREC
if (g_settings.IsUsingNewRec())
{
NewRec::InvalidateBlocksWithPageNumber(page_index);
return;
}
#endif
DebugAssert(page_index < Bus::RAM_8MB_CODE_PAGE_COUNT);
auto& blocks = m_ram_block_map[page_index];
for (CodeBlock* block : blocks)
@ -941,6 +988,14 @@ void InvalidateBlocksWithPageIndex(u32 page_index)
void InvalidateAll()
{
#ifdef ENABLE_NEWREC
if (g_settings.IsUsingNewRec())
{
NewRec::InvalidateAllRAMBlocks();
return;
}
#endif
for (auto& it : s_blocks)
{
CodeBlock* block = it.second;

View File

@ -134,7 +134,7 @@ using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer)
FastMapTable* GetFastMapPointer();
#endif
#if defined(ENABLE_RECOMPILER)
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
JitCodeBuffer& GetCodeBuffer();
#endif

View File

@ -2230,6 +2230,7 @@ void CPU::Execute()
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
case CPUExecutionMode::NewRec:
CodeCache::Execute();
break;

953
src/core/cpu_newrec.cpp Normal file
View File

@ -0,0 +1,953 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "cpu_newrec.h"
#include "bus.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include "cpu_code_cache.h"
#include "cpu_core_private.h"
#include "cpu_newrec_compiler.h"
#include "cpu_newrec_private.h"
#include "cpu_types.h"
#include "settings.h"
#include "system.h"
#include "util/page_fault_handler.h"
#include <unordered_map>
#include <unordered_set>
#include <vector>
Log_SetChannel(CPU::NewRec);
namespace CPU::NewRec {
using LUTRangeList = std::array<std::pair<VirtualMemoryAddress, VirtualMemoryAddress>, 9>;
using PageProtectionArray = std::array<PageProtectionInfo, Bus::RAM_8MB_CODE_PAGE_COUNT>;
static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);
static void InvalidCodeFunction();
static void AllocateLUTs();
static void ResetLUTs();
static void InvalidateBlock(Block* block, BlockState new_state);
static void ClearBlocks();
static void CompileASMFunctions();
static u32 ReadBlockInstructions(u32 start_pc);
static void FillBlockRegInfo(Block* block);
static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
static void AddBlockToPageList(Block* block);
static void BacklinkBlocks(u32 pc, const void* dst);
static void UnlinkBlockExits(Block* block);
static bool InitializeFastmem();
static void ShutdownFastmem();
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
// Fast map provides lookup from PC to function
// Function pointers are offset so that you don't need to subtract
CodeLUTArray g_fast_map;
static BlockLUTArray s_block_map;
static std::unique_ptr<const void*[]> s_lut_code_pointers;
static std::unique_ptr<Block*[]> s_lut_block_pointers;
static PageProtectionArray s_page_protection = {};
static std::vector<Block*> s_blocks;
static BlockLinkMap s_block_links;
static bool s_lut_initialized = false;
// for compiling
static std::vector<Instruction> s_block_instructions;
// fastmem stuff
static std::unordered_map<const void*, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
static std::unordered_set<u32> s_fastmem_faulting_pcs;
NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
const void* g_compile_or_revalidate_block;
const void* g_discard_and_recompile_block;
const void* g_check_events_and_dispatch;
const void* g_dispatcher;
} // namespace CPU::NewRec
namespace CPU::NewRec {
static constexpr u32 GetLUTTableCount(u32 start, u32 end)
{
return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1;
}
static constexpr CPU::NewRec::LUTRangeList GetLUTRanges()
{
const CPU::NewRec::LUTRangeList ranges = {{
{0x00000000, 0x00800000}, // RAM
{0x1F000000, 0x1F800000}, // EXP1
{0x1FC00000, 0x1FC80000}, // BIOS
{0x80000000, 0x80800000}, // RAM
{0x9F000000, 0x9F800000}, // EXP1
{0x9FC00000, 0x9FC80000}, // BIOS
{0xA0000000, 0xA0800000}, // RAM
{0xBF000000, 0xBF800000}, // EXP1
{0xBFC00000, 0xBFC80000} // BIOS
}};
return ranges;
}
static constexpr u32 GetLUTSlotCount(bool include_unreachable)
{
u32 tables = include_unreachable ? 1 : 0; // unreachable table
for (const auto& [start, end] : GetLUTRanges())
tables += GetLUTTableCount(start, end);
return tables * LUT_TABLE_SIZE;
}
} // namespace CPU::NewRec
CPU::NewRec::CodeLUT CPU::NewRec::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
{
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
else
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16));
}
CPU::NewRec::CodeLUT CPU::NewRec::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
{
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
else
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16));
}
CPU::NewRec::CodeLUT CPU::NewRec::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
{
u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
else
return reinterpret_cast<const void**>(fake_byte_ptr + pc);
}
void CPU::NewRec::AllocateLUTs()
{
constexpr u32 num_code_slots = GetLUTSlotCount(true);
constexpr u32 num_block_slots = GetLUTSlotCount(false);
Assert(!s_lut_code_pointers && !s_lut_block_pointers);
s_lut_code_pointers = std::make_unique<const void*[]>(num_code_slots);
s_lut_block_pointers = std::make_unique<Block*[]>(num_block_slots);
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots);
CodeLUT code_table_ptr = s_lut_code_pointers.get();
Block** block_table_ptr = s_lut_block_pointers.get();
CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots;
Block** const block_table_ptr_end = block_table_ptr + num_block_slots;
// Make the unreachable table jump to the invalid code callback.
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
code_table_ptr[i] = reinterpret_cast<const void*>(InvalidCodeFunction);
// Mark everything as unreachable to begin with.
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
{
g_fast_map[i] = EncodeCodeLUTPointer(i, code_table_ptr);
s_block_map[i] = nullptr;
}
code_table_ptr += LUT_TABLE_SIZE;
// Allocate ranges.
for (const auto& [start, end] : GetLUTRanges())
{
const u32 start_slot = start >> LUT_TABLE_SHIFT;
const u32 count = GetLUTTableCount(start, end);
for (u32 i = 0; i < count; i++)
{
const u32 slot = start_slot + i;
g_fast_map[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
code_table_ptr += LUT_TABLE_SIZE;
s_block_map[slot] = block_table_ptr;
block_table_ptr += LUT_TABLE_SIZE;
}
}
Assert(code_table_ptr == code_table_ptr_end);
Assert(block_table_ptr == block_table_ptr_end);
}
void CPU::NewRec::ResetLUTs()
{
if (!s_lut_code_pointers)
return;
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
{
CodeLUT ptr = DecodeCodeLUTPointer(i, g_fast_map[i]);
if (ptr == s_lut_code_pointers.get())
continue;
for (u32 j = 0; j < LUT_TABLE_SIZE; j++)
ptr[j] = g_compile_or_revalidate_block;
}
}
void CPU::NewRec::SetFastMap(u32 pc, const void* function)
{
if (!s_lut_code_pointers)
return;
const u32 table = pc >> LUT_TABLE_SHIFT;
CodeLUT encoded_ptr = g_fast_map[table];
#ifdef _DEBUG
const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
#endif
*OffsetCodeLUTPointer(encoded_ptr, pc) = function;
}
CPU::NewRec::Block* CPU::NewRec::LookupBlock(u32 pc)
{
const u32 table = pc >> LUT_TABLE_SHIFT;
if (!s_block_map[table])
return nullptr;
const u32 idx = (pc & 0xFFFF) >> 2;
return s_block_map[table][idx];
}
CPU::NewRec::Block* CPU::NewRec::CreateBlock(u32 pc)
{
const u32 size = ReadBlockInstructions(pc);
if (size == 0)
{
Log_ErrorPrintf("Cannot compile block at pc %08X", pc);
return nullptr;
}
const u32 table = pc >> LUT_TABLE_SHIFT;
Assert(s_block_map[table]);
const u32 idx = (pc & 0xFFFF) >> 2;
Block* block = s_block_map[table][idx];
if (block)
{
// shouldn't be in the page list.. since we should come here after invalidating
Assert(!block->next_block_in_page);
// if it has the same number of instructions, we can reuse it
if (block->size != size)
{
// this sucks.. hopefully won't happen very often
// TODO: allocate max size, allow shrink but not grow
auto it = std::find(s_blocks.begin(), s_blocks.end(), block);
Assert(it != s_blocks.end());
s_blocks.erase(it);
std::free(block);
block = nullptr;
}
}
if (!block)
{
block =
static_cast<Block*>(std::malloc(sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size)));
Assert(block);
s_blocks.push_back(block);
}
block->pc = pc;
block->size = size;
block->host_code = nullptr;
block->next_block_in_page = nullptr;
block->num_exit_links = 0;
block->state = BlockState::Valid;
std::memcpy(block->Instructions(), s_block_instructions.data(), sizeof(Instruction) * size);
s_block_map[table][idx] = block;
FillBlockRegInfo(block);
// add it to the tracking list for its page
AddBlockToPageList(block);
return block;
}
bool CPU::NewRec::RevalidateBlock(Block* block)
{
DebugAssert(block->state != BlockState::Valid);
DebugAssert(BlockInRAM(block->pc));
if (block->state == BlockState::NeedsRecompile)
return false;
// blocks shouldn't be wrapping..
const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc);
DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size);
// can just do a straight memcmp..
if (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) != 0)
{
// changed, needs recompiling
Log_DebugPrintf("Block at PC %08X has changed and needs recompiling", block->pc);
return false;
}
block->state = BlockState::Valid;
AddBlockToPageList(block);
return true;
}
void CPU::NewRec::CompileOrRevalidateBlock(u32 start_pc)
{
// TODO: this doesn't currently handle when the cache overflows...
Block* block = LookupBlock(start_pc);
if (block)
{
// we should only be here if the block got invalidated
DebugAssert(block->state != BlockState::Valid);
if (RevalidateBlock(block))
{
SetFastMap(start_pc, block->host_code);
BacklinkBlocks(start_pc, block->host_code);
return;
}
// remove outward links from this block, since we're recompiling it
UnlinkBlockExits(block);
}
block = CreateBlock(start_pc);
if (!block)
Panic("Failed to create block, TODO fallback to interpreter");
block->host_code = g_compiler->CompileBlock(block);
if (!block->host_code)
{
// block failed to compile
// TODO: this shouldn't backlink
block->host_code = reinterpret_cast<const void*>(&CPU::CodeCache::InterpretUncachedBlock<PGXPMode::Disabled>);
Panic("Block failed compilation");
}
SetFastMap(start_pc, block->host_code);
BacklinkBlocks(start_pc, block->host_code);
}
void CPU::NewRec::DiscardAndRecompileBlock(u32 start_pc)
{
Log_DevPrintf("Discard block %08X with manual protection", start_pc);
Block* block = LookupBlock(start_pc);
DebugAssert(block && block->state == BlockState::Valid);
InvalidateBlock(block, BlockState::NeedsRecompile);
CompileOrRevalidateBlock(start_pc);
}
void CPU::NewRec::AddBlockToPageList(Block* block)
{
if (!BlockInRAM(block->pc) || block->next_block_in_page)
return;
// TODO: what about blocks which span more than one page?
const u32 page_idx = Bus::GetRAMCodePageIndex(block->pc);
PageProtectionInfo& entry = s_page_protection[page_idx];
if (entry.mode != PageProtectionMode::WriteProtected)
return;
Bus::SetRAMCodePage(page_idx);
if (entry.last_block_in_page)
{
entry.last_block_in_page->next_block_in_page = block;
entry.last_block_in_page = block;
}
else
{
entry.first_block_in_page = block;
entry.last_block_in_page = block;
}
}
void CPU::NewRec::InvalidateBlocksWithPageNumber(u32 index)
{
DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
Bus::ClearRAMCodePage(index);
BlockState new_block_state = BlockState::Invalidated;
PageProtectionInfo& ppi = s_page_protection[index];
const u32 frame_number = System::GetFrameNumber();
const u32 frame_delta = frame_number - ppi.invalidate_frame;
ppi.invalidate_count++;
if (frame_delta >= 10)
{
ppi.invalidate_count = 1;
ppi.invalidate_frame = frame_number;
}
else if (ppi.invalidate_count > 3)
{
Log_DevPrintf("%u invalidations to page %u in %u frames, switching to manual protection", ppi.invalidate_count,
index, frame_delta);
ppi.mode = PageProtectionMode::ManualCheck;
new_block_state = BlockState::NeedsRecompile;
}
Block* block = ppi.first_block_in_page;
while (block)
{
InvalidateBlock(block, new_block_state);
Block* next_block = block->next_block_in_page;
block->next_block_in_page = nullptr;
block = next_block;
}
ppi.first_block_in_page = nullptr;
ppi.last_block_in_page = nullptr;
}
CPU::NewRec::PageProtectionMode CPU::NewRec::GetProtectionModeForBlock(Block* block)
{
if (!BlockInRAM(block->pc))
return PageProtectionMode::Unprotected;
const u32 page_idx = Bus::GetRAMCodePageIndex(block->pc);
const PageProtectionInfo& ppi = s_page_protection[page_idx];
return ppi.mode;
}
u32 CPU::NewRec::CreateBlockLink(Block* block, void* code, u32 newpc)
{
const void* dst = g_dispatcher;
if (g_settings.cpu_recompiler_block_linking)
{
const Block* next_block = LookupBlock(newpc);
dst =
(next_block && next_block->state == BlockState::Valid) ? next_block->host_code : g_compile_or_revalidate_block;
BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code);
DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
block->exit_links[block->num_exit_links++] = iter;
}
Log_DebugPrintf("Linking %p with dst pc %08X to %p%s", code, newpc, dst,
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
return EmitJump(code, dst, false);
}
void CPU::NewRec::BacklinkBlocks(u32 pc, const void* dst)
{
if (!g_settings.cpu_recompiler_block_linking)
return;
const auto link_range = s_block_links.equal_range(pc);
for (auto it = link_range.first; it != link_range.second; ++it)
{
Log_DebugPrintf("Backlinking %p with dst pc %08X to %p%s", it->second, pc, dst,
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
EmitJump(it->second, dst, true);
}
}
void CPU::NewRec::UnlinkBlockExits(Block* block)
{
for (u32 i = 0; i < block->num_exit_links; i++)
s_block_links.erase(block->exit_links[i]);
block->num_exit_links = 0;
}
void CPU::NewRec::InvalidCodeFunction()
{
Panic("fixme");
}
void CPU::NewRec::CompileASMFunctions()
{
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
DebugAssert(buffer.GetTotalUsed() == 0);
const u32 asm_size = CompileASMFunctions(buffer.GetFreeCodePointer(), buffer.GetFreeCodeSpace());
Log_ProfilePrintf("ASM functions generated %u bytes of host code", asm_size);
buffer.CommitCode(asm_size);
}
bool CPU::NewRec::Initialize()
{
if (!s_lut_initialized)
{
s_lut_initialized = true;
AllocateLUTs();
}
CompileASMFunctions();
ResetLUTs();
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
return false;
return true;
}
void CPU::NewRec::Shutdown()
{
if (!s_lut_initialized)
return;
ClearBlocks();
ShutdownFastmem();
}
[[noreturn]] void CPU::NewRec::Execute()
{
g_enter_recompiler();
}
void CPU::NewRec::InvalidateBlock(Block* block, BlockState new_state)
{
if (block->state == BlockState::Valid)
{
SetFastMap(block->pc, g_compile_or_revalidate_block);
BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
}
block->state = new_state;
}
void CPU::NewRec::InvalidateAllRAMBlocks()
{
// TODO: maybe combine the backlink into one big instruction flush cache?
for (Block* block : s_blocks)
{
if (BlockInRAM(block->pc))
InvalidateBlock(block, BlockState::Invalidated);
}
}
void CPU::NewRec::ClearBlocks()
{
for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
{
PageProtectionInfo& ppi = s_page_protection[i];
if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page)
Bus::ClearRAMCodePage(i);
ppi = {};
}
s_fastmem_backpatch_info.clear();
s_fastmem_faulting_pcs.clear();
s_block_links.clear();
for (Block* block : s_blocks)
std::free(block);
s_blocks.clear();
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false));
}
void CPU::NewRec::Reset()
{
ClearBlocks();
CompileASMFunctions();
ResetLUTs();
if (g_settings.IsUsingFastmem())
CPU::UpdateMemoryPointers();
}
bool CPU::NewRec::InitializeFastmem()
{
const CPUFastmemMode mode = g_settings.cpu_fastmem_mode;
Assert(mode == CPUFastmemMode::MMap);
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
if (!Common::PageFaultHandler::InstallHandler(&g_fast_map, buffer.GetCodePointer(), buffer.GetTotalSize(),
&PageFaultHandler))
{
Log_ErrorPrintf("Failed to install page fault handler");
return false;
}
Bus::UpdateFastmemViews(mode);
CPU::UpdateMemoryPointers();
return true;
}
void CPU::NewRec::ShutdownFastmem()
{
Common::PageFaultHandler::RemoveHandler(&g_fast_map);
Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
CPU::UpdateMemoryPointers();
}
void CPU::NewRec::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
bool is_load)
{
DebugAssert(code_size < std::numeric_limits<u8>::max());
DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
auto iter = s_fastmem_backpatch_info.find(code_address);
if (iter != s_fastmem_backpatch_info.end())
s_fastmem_backpatch_info.erase(iter);
const LoadstoreBackpatchInfo info{
guest_pc, gpr_bitmask, static_cast<u16>(cycles), address_register, data_register, static_cast<u16>(size),
is_signed, is_load, static_cast<u8>(code_size), static_cast<u8>(0)};
s_fastmem_backpatch_info.emplace(code_address, info);
}
Common::PageFaultHandler::HandlerResult CPU::NewRec::PageFaultHandler(void* exception_pc, void* fault_address,
bool is_write)
{
if (static_cast<u8*>(fault_address) < static_cast<u8*>(g_state.fastmem_base) ||
(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)) >=
static_cast<ptrdiff_t>(Bus::FASTMEM_ARENA_SIZE))
{
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
const PhysicalMemoryAddress guest_address = static_cast<PhysicalMemoryAddress>(
static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)));
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
is_write ? "(write)" : "(read)", guest_address);
auto iter = s_fastmem_backpatch_info.find(exception_pc);
if (iter == s_fastmem_backpatch_info.end())
{
Log_ErrorPrintf("No backpatch info found for %p", exception_pc);
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
}
// if we're writing to ram, let it go through a few times, and use manual block protection to sort it out
LoadstoreBackpatchInfo& info = iter->second;
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(guest_address) && info.fault_count < 10)
{
Log_DevPrintf("Ignoring fault due to RAM write");
InvalidateBlocksWithPageNumber(Bus::GetRAMCodePageIndex(guest_address));
info.fault_count++;
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
Log_DevPrintf("Backpatching %s at %p[%u] (pc %08X addr %08X): Bitmask %08X Addr %u Data %u Size %u Signed %02X",
info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address,
info.gpr_bitmask, info.address_register, info.data_register, info.AccessSizeInBytes(), info.is_signed);
// remove the cycles we added for the memory read, then take them off again after the backpatch
// the normal rec path will add the ram read ticks later, so we need to take them off at the end
DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS);
const TickCount cycles_to_add =
static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0);
const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles));
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
const u32 thunk_size =
BackpatchLoadStore(buffer.GetFreeFarCodePointer(), buffer.GetFreeFarCodeSpace(), exception_pc, info.code_size,
cycles_to_add, cycles_to_remove, info.gpr_bitmask, info.address_register, info.data_register,
info.AccessSize(), info.is_signed, info.is_load);
buffer.CommitFarCode(thunk_size);
// TODO: queue block for recompilation later
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
s_fastmem_faulting_pcs.insert(info.guest_pc);
s_fastmem_backpatch_info.erase(iter);
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
}
// TODO: move this into the compiler
u32 CPU::NewRec::ReadBlockInstructions(u32 start_pc)
{
u32 pc = start_pc;
bool is_branch = false;
bool is_branch_delay = false;
// TODO: Jump to other block if it exists at this pc?
s_block_instructions.clear();
for (;;)
{
Instruction i;
if (!SafeReadInstruction(pc, &i.bits) || !IsInvalidInstruction(i))
break;
is_branch_delay = is_branch;
is_branch = IsBranchInstruction(i);
s_block_instructions.push_back(i);
pc += sizeof(Instruction);
if (is_branch_delay)
break;
if (IsExitBlockInstruction(i))
break;
}
return static_cast<u32>(s_block_instructions.size());
}
void CPU::NewRec::SetRegAccess(InstructionInfo* inst, Reg reg, bool write)
{
if (reg == Reg::zero)
return;
if (!write)
{
for (u32 i = 0; i < std::size(inst->read_reg); i++)
{
if (inst->read_reg[i] == Reg::zero)
{
inst->read_reg[i] = reg;
break;
}
}
}
else
{
#if 0
for (u32 i = 0; i < std::size(inst->write_reg); i++)
{
if (inst->write_reg[i] == Reg::zero)
{
inst->write_reg[i] = reg;
break;
}
}
#endif
}
}
#define BackpropSetReads(reg) \
do \
{ \
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
prev->reg_flags[static_cast<u8>(reg)] |= RI_LIVE | RI_USED; \
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
SetRegAccess(inst, reg, false); \
} while (0)
#define BackpropSetWrites(reg) \
do \
{ \
prev->reg_flags[static_cast<u8>(reg)] &= ~(RI_LIVE | RI_USED); \
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
SetRegAccess(inst, reg, true); \
} while (0)
// TODO: memory loads should be delayed one instruction because of stupid load delays.
#define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg)
void CPU::NewRec::FillBlockRegInfo(Block* block)
{
const Instruction* iinst = block->Instructions() + (block->size - 1);
InstructionInfo* const start = block->InstructionsInfo();
InstructionInfo* inst = start + (block->size - 1);
std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags));
std::memset(inst->read_reg, 0, sizeof(inst->read_reg));
// std::memset(inst->write_reg, 0, sizeof(inst->write_reg));
while (inst != start)
{
InstructionInfo* prev = inst - 1;
std::memcpy(prev, inst, sizeof(InstructionInfo));
const Reg rs = iinst->r.rs;
const Reg rt = iinst->r.rt;
switch (iinst->op)
{
case InstructionOp::funct:
{
const Reg rd = iinst->r.rd;
switch (iinst->r.funct)
{
case InstructionFunct::sll:
case InstructionFunct::srl:
case InstructionFunct::sra:
BackpropSetWrites(rd);
BackpropSetReads(rt);
break;
case InstructionFunct::sllv:
case InstructionFunct::srlv:
case InstructionFunct::srav:
case InstructionFunct::add:
case InstructionFunct::addu:
case InstructionFunct::sub:
case InstructionFunct::subu:
case InstructionFunct::and_:
case InstructionFunct::or_:
case InstructionFunct::xor_:
case InstructionFunct::nor:
case InstructionFunct::slt:
case InstructionFunct::sltu:
BackpropSetWrites(rd);
BackpropSetReads(rt);
BackpropSetReads(rs);
break;
case InstructionFunct::jr:
BackpropSetReads(rs);
break;
case InstructionFunct::jalr:
BackpropSetReads(rs);
BackpropSetWrites(rd);
break;
case InstructionFunct::mfhi:
BackpropSetWrites(rd);
BackpropSetReads(Reg::hi);
break;
case InstructionFunct::mflo:
BackpropSetWrites(rd);
BackpropSetReads(Reg::lo);
break;
case InstructionFunct::mthi:
BackpropSetWrites(Reg::hi);
BackpropSetReads(rs);
break;
case InstructionFunct::mtlo:
BackpropSetWrites(Reg::lo);
BackpropSetReads(rs);
break;
case InstructionFunct::mult:
case InstructionFunct::multu:
case InstructionFunct::div:
case InstructionFunct::divu:
BackpropSetWrites(Reg::hi);
BackpropSetWrites(Reg::lo);
BackpropSetReads(rs);
BackpropSetReads(rt);
break;
case InstructionFunct::syscall:
case InstructionFunct::break_:
break;
default:
Log_ErrorPrintf("Unknown funct %u", static_cast<u32>(iinst->r.funct.GetValue()));
break;
}
}
break;
case InstructionOp::b:
{
if ((static_cast<u8>(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10))
BackpropSetWrites(Reg::ra);
BackpropSetReads(rs);
}
break;
case InstructionOp::j:
break;
case InstructionOp::jal:
BackpropSetWrites(Reg::ra);
break;
case InstructionOp::beq:
case InstructionOp::bne:
BackpropSetReads(rs);
BackpropSetReads(rt);
break;
case InstructionOp::blez:
case InstructionOp::bgtz:
BackpropSetReads(rs);
break;
case InstructionOp::addi:
case InstructionOp::addiu:
case InstructionOp::slti:
case InstructionOp::sltiu:
case InstructionOp::andi:
case InstructionOp::ori:
case InstructionOp::xori:
BackpropSetWrites(rt);
BackpropSetReads(rs);
break;
case InstructionOp::lui:
BackpropSetWrites(rt);
break;
case InstructionOp::lb:
case InstructionOp::lh:
case InstructionOp::lw:
case InstructionOp::lbu:
case InstructionOp::lhu:
BackpropSetWritesDelayed(rt);
BackpropSetReads(rs);
break;
case InstructionOp::lwl:
case InstructionOp::lwr:
BackpropSetWritesDelayed(rt);
BackpropSetReads(rs);
BackpropSetReads(rt);
break;
case InstructionOp::sb:
case InstructionOp::sh:
case InstructionOp::swl:
case InstructionOp::sw:
case InstructionOp::swr:
BackpropSetReads(rt);
BackpropSetReads(rs);
break;
case InstructionOp::cop0:
case InstructionOp::cop2:
{
if (iinst->cop.IsCommonInstruction())
{
switch (iinst->cop.CommonOp())
{
case CopCommonInstruction::mfcn:
case CopCommonInstruction::cfcn:
BackpropSetWritesDelayed(rt);
break;
case CopCommonInstruction::mtcn:
case CopCommonInstruction::ctcn:
BackpropSetReads(rt);
break;
}
}
break;
case InstructionOp::lwc2:
case InstructionOp::swc2:
BackpropSetReads(rs);
BackpropSetReads(rt);
break;
default:
Log_ErrorPrintf("Unknown op %u", static_cast<u32>(iinst->r.funct.GetValue()));
break;
}
} // end switch
inst--;
iinst--;
} // end while
}

19
src/core/cpu_newrec.h Normal file
View File

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "types.h"
#ifdef ENABLE_NEWREC
namespace CPU::NewRec
{
bool Initialize();
void Reset();
void Shutdown();
[[noreturn]] void Execute();
void InvalidateAllRAMBlocks();
void InvalidateBlocksWithPageNumber(u32 index);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,458 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec.h"
#include "cpu_newrec_private.h"
#include "cpu_types.h"
#include <array>
#include <bitset>
#include <optional>
#include <utility>
#include <vector>
namespace CPU::NewRec {
// Global options
static constexpr bool EMULATE_LOAD_DELAYS = true;
static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true;
// Arch-specific options
#if defined(CPU_ARCH_X64)
static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = true;
#elif defined(CPU_ARCH_ARM64)
static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false;
#elif defined(CPU_ARCH_RISCV64)
static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false;
#endif
class Compiler
{
public:
Compiler();
virtual ~Compiler();
const void* CompileBlock(Block* block);
protected:
enum FlushFlags : u32
{
FLUSH_FLUSH_MIPS_REGISTERS = (1 << 0),
FLUSH_INVALIDATE_MIPS_REGISTERS = (1 << 1),
FLUSH_FREE_CALLER_SAVED_REGISTERS = (1 << 2),
FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS = (1 << 3),
FLUSH_FREE_ALL_REGISTERS = (1 << 4),
FLUSH_PC = (1 << 5),
FLUSH_INSTRUCTION_BITS = (1 << 6),
FLUSH_CYCLES = (1 << 7),
FLUSH_LOAD_DELAY = (1 << 8),
FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9),
FLUSH_GTE_DONE_CYCLE = (1 << 10),
FLUSH_GTE_STALL_FROM_STATE = (1 << 11),
FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS),
FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE),
FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS),
FLUSH_FOR_EXCEPTION =
(FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next.
FLUSH_FOR_INTERPRETER =
(FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC |
FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE),
FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS |
FLUSH_GTE_STALL_FROM_STATE),
};
union CompileFlags
{
struct
{
u32 const_s : 1; // S is constant
u32 const_t : 1; // T is constant
u32 const_lo : 1; // LO is constant
u32 const_hi : 1; // HI is constant
u32 valid_host_d : 1; // D is valid in host register
u32 valid_host_s : 1; // S is valid in host register
u32 valid_host_t : 1; // T is valid in host register
u32 valid_host_lo : 1; // LO is valid in host register
u32 valid_host_hi : 1; // HI is valid in host register
u32 host_d : 5; // D host register
u32 host_s : 5; // S host register
u32 host_t : 5; // T host register
u32 host_lo : 5; // LO host register
u32 delay_slot_swapped : 1;
u32 pad1 : 2; // 28..31
u32 host_hi : 5; // HI host register
u32 mips_s : 5; // S guest register
u32 mips_t : 5; // T guest register
u32 pad2 : 15; // 32 bits
};
u64 bits;
ALWAYS_INLINE Reg MipsS() const { return static_cast<Reg>(mips_s); }
ALWAYS_INLINE Reg MipsT() const { return static_cast<Reg>(mips_t); }
};
static_assert(sizeof(CompileFlags) == sizeof(u64));
enum TemplateFlag : u32
{
TF_READS_S = (1 << 0),
TF_READS_T = (1 << 1),
TF_READS_LO = (1 << 2),
TF_READS_HI = (1 << 3),
TF_WRITES_D = (1 << 4),
TF_WRITES_T = (1 << 5),
TF_WRITES_LO = (1 << 6),
TF_WRITES_HI = (1 << 7),
TF_COMMUTATIVE = (1 << 8), // S op T == T op S
TF_CAN_OVERFLOW = (1 << 9),
// TF_NORENAME = // TODO
TF_LOAD_DELAY = (1 << 10),
TF_GTE_STALL = (1 << 11),
TF_NO_NOP = (1 << 12),
TF_NEEDS_REG_S = (1 << 13),
TF_NEEDS_REG_T = (1 << 14),
TF_CAN_SWAP_DELAY_SLOT = (1 << 15),
TF_RENAME_WITH_ZERO_T = (1 << 16), // add commutative for S as well
TF_RENAME_WITH_ZERO_IMM = (1 << 17),
TF_PGXP_WITHOUT_CPU = (1 << 18),
};
enum HostRegFlags : u8
{
HR_ALLOCATED = (1 << 0),
HR_NEEDED = (1 << 1),
HR_MODE_READ = (1 << 2), // valid
HR_MODE_WRITE = (1 << 3), // dirty
HR_USABLE = (1 << 7),
HR_CALLEE_SAVED = (1 << 6),
ALLOWED_HR_FLAGS = HR_MODE_READ | HR_MODE_WRITE,
IMMUTABLE_HR_FLAGS = HR_USABLE | HR_CALLEE_SAVED,
};
enum HostRegAllocType : u8
{
HR_TYPE_TEMP,
HR_TYPE_CPU_REG,
HR_TYPE_PC_WRITEBACK,
HR_TYPE_LOAD_DELAY_VALUE,
HR_TYPE_NEXT_LOAD_DELAY_VALUE,
};
struct HostRegAlloc
{
u8 flags;
HostRegAllocType type;
Reg reg;
u16 counter;
};
enum class BranchCondition : u8
{
Equal,
NotEqual,
GreaterThanZero,
GreaterEqualZero,
LessThanZero,
LessEqualZero,
};
ALWAYS_INLINE bool HasConstantReg(Reg r) const { return m_constant_regs_valid.test(static_cast<u32>(r)); }
ALWAYS_INLINE bool HasDirtyConstantReg(Reg r) const { return m_constant_regs_dirty.test(static_cast<u32>(r)); }
ALWAYS_INLINE bool HasConstantRegValue(Reg r, u32 val) const
{
return m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u32>(r)] == val;
}
ALWAYS_INLINE u32 GetConstantRegU32(Reg r) const { return m_constant_reg_values[static_cast<u32>(r)]; }
ALWAYS_INLINE s32 GetConstantRegS32(Reg r) const
{
return static_cast<s32>(m_constant_reg_values[static_cast<u32>(r)]);
}
void SetConstantReg(Reg r, u32 v);
void ClearConstantReg(Reg r);
void FlushConstantReg(Reg r);
void FlushConstantRegs(bool invalidate);
Reg MipsD() const;
u32 GetConditionalBranchTarget(CompileFlags cf) const;
u32 GetBranchReturnAddress(CompileFlags cf) const;
bool TrySwapDelaySlot(Reg rs = Reg::zero, Reg rt = Reg::zero, Reg rd = Reg::zero);
void SetCompilerPC(u32 newpc);
virtual void DisassembleAndLog(const void* start, u32 size) = 0;
virtual u32 GetHostInstructionCount(const void* start, u32 size) = 0;
virtual const void* GetCurrentCodePointer() = 0;
virtual void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space);
virtual void BeginBlock();
virtual void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) = 0;
virtual void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) = 0;
virtual void EndBlock(const std::optional<u32>& newpc) = 0;
virtual void EndBlockWithException(Exception excode) = 0;
virtual const void* EndCompile(u32* code_size, u32* far_code_size) = 0;
ALWAYS_INLINE bool IsHostRegAllocated(u32 r) const { return (m_host_regs[r].flags & HR_ALLOCATED) != 0; }
static const char* GetReadWriteModeString(u32 flags);
virtual const char* GetHostRegName(u32 reg) const = 0;
u32 GetFreeHostReg(u32 flags);
u32 AllocateHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
std::optional<u32> CheckHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
u32 AllocateTempHostReg(u32 flags = 0);
void SwapHostRegAlloc(u32 lhs, u32 rhs);
void FlushHostReg(u32 reg);
void FreeHostReg(u32 reg);
void ClearHostReg(u32 reg);
void MarkRegsNeeded(HostRegAllocType type, Reg reg);
void RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg);
void ClearHostRegNeeded(u32 reg);
void ClearHostRegsNeeded();
void DeleteMIPSReg(Reg reg, bool flush);
bool TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other);
void UpdateHostRegCounters();
virtual void LoadHostRegWithConstant(u32 reg, u32 val) = 0;
virtual void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) = 0;
virtual void StoreConstantToCPUPointer(u32 val, const void* ptr) = 0;
virtual void StoreHostRegToCPUPointer(u32 reg, const void* ptr) = 0;
virtual void CopyHostReg(u32 dst, u32 src) = 0;
virtual void Flush(u32 flags);
/// Returns true if there is a load delay which will be stored at the end of the instruction.
bool HasLoadDelay() const { return m_load_delay_register != Reg::count; }
/// Cancels any pending load delay to the specified register.
void CancelLoadDelaysToReg(Reg reg);
/// Moves load delay to the next load delay, and writes any previous load delay to the destination register.
void UpdateLoadDelay();
/// Flushes the load delay, i.e. writes it to the destination register.
void FinishLoadDelay();
/// Flushes the load delay, but only if it matches the specified register.
void FinishLoadDelayToReg(Reg reg);
/// Uses a caller-saved register for load delays when PGXP is enabled.
u32 GetFlagsForNewLoadDelayedReg() const;
void BackupHostState();
void RestoreHostState();
/// Registers loadstore for possible backpatching.
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register,
MemoryAccessSize size, bool is_signed, bool is_load);
void CompileInstruction();
void CompileBranchDelaySlot(bool dirty_pc = true);
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags),
const void* pgxp_cpu_func, u32 tflags);
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
const std::optional<VirtualMemoryAddress>&),
MemoryAccessSize size, bool store, bool sign, u32 tflags);
void CompileMoveRegTemplate(Reg dst, Reg src);
virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) = 0;
virtual void Compile_Fallback() = 0;
void Compile_j();
virtual void Compile_jr(CompileFlags cf) = 0;
void Compile_jr_const(CompileFlags cf);
void Compile_jal();
virtual void Compile_jalr(CompileFlags cf) = 0;
void Compile_jalr_const(CompileFlags cf);
void Compile_syscall();
void Compile_break();
void Compile_b_const(CompileFlags cf);
void Compile_b(CompileFlags cf);
void Compile_blez(CompileFlags cf);
void Compile_blez_const(CompileFlags cf);
void Compile_bgtz(CompileFlags cf);
void Compile_bgtz_const(CompileFlags cf);
void Compile_beq(CompileFlags cf);
void Compile_beq_const(CompileFlags cf);
void Compile_bne(CompileFlags cf);
void Compile_bne_const(CompileFlags cf);
virtual void Compile_bxx(CompileFlags cf, BranchCondition cond) = 0;
void Compile_bxx_const(CompileFlags cf, BranchCondition cond);
void Compile_sll_const(CompileFlags cf);
virtual void Compile_sll(CompileFlags cf) = 0;
void Compile_srl_const(CompileFlags cf);
virtual void Compile_srl(CompileFlags cf) = 0;
void Compile_sra_const(CompileFlags cf);
virtual void Compile_sra(CompileFlags cf) = 0;
void Compile_sllv_const(CompileFlags cf);
virtual void Compile_sllv(CompileFlags cf) = 0;
void Compile_srlv_const(CompileFlags cf);
virtual void Compile_srlv(CompileFlags cf) = 0;
void Compile_srav_const(CompileFlags cf);
virtual void Compile_srav(CompileFlags cf) = 0;
void Compile_mult_const(CompileFlags cf);
virtual void Compile_mult(CompileFlags cf) = 0;
void Compile_multu_const(CompileFlags cf);
virtual void Compile_multu(CompileFlags cf) = 0;
void Compile_div_const(CompileFlags cf);
virtual void Compile_div(CompileFlags cf) = 0;
void Compile_divu_const(CompileFlags cf);
virtual void Compile_divu(CompileFlags cf) = 0;
void Compile_add_const(CompileFlags cf);
virtual void Compile_add(CompileFlags cf) = 0;
void Compile_addu_const(CompileFlags cf);
virtual void Compile_addu(CompileFlags cf) = 0;
void Compile_sub_const(CompileFlags cf);
virtual void Compile_sub(CompileFlags cf) = 0;
void Compile_subu_const(CompileFlags cf);
virtual void Compile_subu(CompileFlags cf) = 0;
void Compile_and_const(CompileFlags cf);
virtual void Compile_and(CompileFlags cf) = 0;
void Compile_or_const(CompileFlags cf);
virtual void Compile_or(CompileFlags cf) = 0;
void Compile_xor_const(CompileFlags cf);
virtual void Compile_xor(CompileFlags cf) = 0;
void Compile_nor_const(CompileFlags cf);
virtual void Compile_nor(CompileFlags cf) = 0;
void Compile_slt_const(CompileFlags cf);
virtual void Compile_slt(CompileFlags cf) = 0;
void Compile_sltu_const(CompileFlags cf);
virtual void Compile_sltu(CompileFlags cf) = 0;
void Compile_addi_const(CompileFlags cf);
virtual void Compile_addi(CompileFlags cf) = 0;
void Compile_addiu_const(CompileFlags cf);
virtual void Compile_addiu(CompileFlags cf) = 0;
void Compile_slti_const(CompileFlags cf);
virtual void Compile_slti(CompileFlags cf) = 0;
void Compile_sltiu_const(CompileFlags cf);
virtual void Compile_sltiu(CompileFlags cf) = 0;
void Compile_andi_const(CompileFlags cf);
virtual void Compile_andi(CompileFlags cf) = 0;
void Compile_ori_const(CompileFlags cf);
virtual void Compile_ori(CompileFlags cf) = 0;
void Compile_xori_const(CompileFlags cf);
virtual void Compile_xori(CompileFlags cf) = 0;
void Compile_lui();
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0; // lwl/lwr
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0; // swl/swr
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
static u32* GetCop0RegPtr(Cop0Reg reg);
static u32 GetCop0RegWriteMask(Cop0Reg reg);
void Compile_mfc0(CompileFlags cf);
virtual void Compile_mtc0(CompileFlags cf) = 0;
virtual void Compile_rfe(CompileFlags cf) = 0;
void AddGTETicks(TickCount ticks);
void StallUntilGTEComplete();
virtual void Compile_mfc2(CompileFlags cf) = 0;
virtual void Compile_mtc2(CompileFlags cf) = 0;
virtual void Compile_cop2(CompileFlags cf) = 0;
enum GTERegisterAccessAction : u8
{
Ignore,
Direct,
ZeroExtend16,
SignExtend16,
CallHandler,
PushFIFO,
};
static std::pair<u32*, GTERegisterAccessAction> GetGTERegisterPointer(u32 index, bool writing);
Block* m_block = nullptr;
u32 m_compiler_pc = 0;
TickCount m_cycles = 0;
TickCount m_gte_done_cycle = 0;
const Instruction* inst = nullptr;
const InstructionInfo* iinfo = nullptr;
u32 m_current_instruction_pc = 0;
bool m_current_instruction_branch_delay_slot = false;
bool m_branch_delay_slot_swapped = false;
bool m_dirty_pc = false;
bool m_dirty_instruction_bits = false;
bool m_dirty_gte_done_cycle = false;
bool m_block_ended = false;
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_valid = {};
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_dirty = {};
std::array<u32, static_cast<size_t>(Reg::count)> m_constant_reg_values = {};
std::array<HostRegAlloc, NUM_HOST_REGS> m_host_regs = {};
u16 m_register_alloc_counter = 0;
bool m_load_delay_dirty = true;
Reg m_load_delay_register = Reg::count;
u32 m_load_delay_value_register = 0;
Reg m_next_load_delay_register = Reg::count;
u32 m_next_load_delay_value_register = 0;
struct HostStateBackup
{
TickCount cycles;
TickCount gte_done_cycle;
u32 compiler_pc;
bool dirty_pc;
bool dirty_instruction_bits;
bool dirty_gte_done_cycle;
bool block_ended;
const Instruction* inst;
const InstructionInfo* iinfo;
u32 current_instruction_pc;
bool current_instruction_delay_slot;
std::bitset<static_cast<size_t>(Reg::count)> const_regs_valid;
std::bitset<static_cast<size_t>(Reg::count)> const_regs_dirty;
std::array<u32, static_cast<size_t>(Reg::count)> const_regs_values;
std::array<HostRegAlloc, NUM_HOST_REGS> host_regs;
u16 register_alloc_counter;
bool load_delay_dirty;
Reg load_delay_register;
u32 load_delay_value_register;
Reg next_load_delay_register;
u32 next_load_delay_value_register;
};
// we need two of these, one for branch delays, and another if we have an overflow in the delay slot
std::array<HostStateBackup, 2> m_host_state_backup = {};
u32 m_host_state_backup_count = 0;
// PGXP memory callbacks
static const std::array<std::array<const void*, 2>, 3> s_pgxp_mem_load_functions;
static const std::array<const void*, 3> s_pgxp_mem_store_functions;
};
extern Compiler* g_compiler;
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,166 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <memory>
#include "vixl/aarch64/assembler-aarch64.h"
namespace CPU::NewRec {
class AArch64Compiler final : public Compiler
{
public:
AArch64Compiler();
~AArch64Compiler() override;
protected:
void DisassembleAndLog(const void* start, u32 size) override;
u32 GetHostInstructionCount(const void* start, u32 size) override;
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const vixl::aarch64::WRegister& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf, bool overflow);
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_shift(CompileFlags cf, void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&, unsigned));
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf,
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&,
const vixl::aarch64::Register&),
void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&, unsigned));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const vixl::aarch64::WRegister& result);
void Compile_dst_op(CompileFlags cf,
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&,
const vixl::aarch64::Operand&),
bool commutative, bool logical, bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
vixl::aarch64::WRegister
ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
template<typename RegAllocFn>
vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const vixl::aarch64::WRegister& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void EmitMov(const vixl::aarch64::WRegister& dst, u32 val);
void EmitCall(const void* ptr, bool force_inline = false);
vixl::aarch64::Operand armCheckAddSubConstant(s32 val);
vixl::aarch64::Operand armCheckAddSubConstant(u32 val);
vixl::aarch64::Operand armCheckCompareConstant(s32 val);
vixl::aarch64::Operand armCheckLogicalConstant(u32 val);
void SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
void SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit);
void SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero);
void SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
void AssertRegOrConstS(CompileFlags cf) const;
void AssertRegOrConstT(CompileFlags cf) const;
vixl::aarch64::MemOperand MipsPtr(Reg r) const;
vixl::aarch64::WRegister CFGetRegD(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegS(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegT(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegLO(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegHI(CompileFlags cf) const;
void MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
void MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
void MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg);
std::unique_ptr<vixl::aarch64::Assembler> m_emitter;
std::unique_ptr<vixl::aarch64::Assembler> m_far_emitter;
vixl::aarch64::Assembler* armAsm;
#ifdef VIXL_DEBUG
std::unique_ptr<vixl::CodeBufferCheckScope> m_emitter_check;
std::unique_ptr<vixl::CodeBufferCheckScope> m_far_emitter_check;
#endif
};
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,168 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <memory>
#include "biscuit/assembler.hpp"
namespace CPU::NewRec {
class RISCV64Compiler final : public Compiler
{
public:
RISCV64Compiler();
~RISCV64Compiler() override;
protected:
void DisassembleAndLog(const void* start, u32 size) override;
u32 GetHostInstructionCount(const void* start, u32 size) override;
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const biscuit::GPR& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf, bool overflow);
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard);
void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative,
bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const biscuit::GPR>& reg = std::nullopt);
template<typename RegAllocFn>
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc);
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const biscuit::GPR& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
private:
void EmitMov(const biscuit::GPR& dst, u32 val);
void EmitCall(const void* ptr);
void SwitchToFarCode(bool emit_jump,
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) = nullptr,
const biscuit::GPR& rs1 = biscuit::zero, const biscuit::GPR& rs2 = biscuit::zero);
void SwitchToNearCode(bool emit_jump);
void AssertRegOrConstS(CompileFlags cf) const;
void AssertRegOrConstT(CompileFlags cf) const;
// vixl::aarch64::MemOperand MipsPtr(Reg r) const;
void SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
void (biscuit::Assembler::*iop)(biscuit::GPR, biscuit::GPR, u32),
void (biscuit::Assembler::*rop)(biscuit::GPR, biscuit::GPR, biscuit::GPR));
void SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
biscuit::GPR CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg);
biscuit::GPR CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg);
biscuit::GPR CFGetRegD(CompileFlags cf) const;
biscuit::GPR CFGetRegS(CompileFlags cf) const;
biscuit::GPR CFGetRegT(CompileFlags cf) const;
biscuit::GPR CFGetRegLO(CompileFlags cf) const;
biscuit::GPR CFGetRegHI(CompileFlags cf) const;
void MoveSToReg(const biscuit::GPR& dst, CompileFlags cf);
void MoveTToReg(const biscuit::GPR& dst, CompileFlags cf);
std::unique_ptr<biscuit::Assembler> m_emitter;
std::unique_ptr<biscuit::Assembler> m_far_emitter;
biscuit::Assembler* rvAsm;
};
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,149 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <initializer_list>
#include <memory>
// We need to include windows.h before xbyak does..
#ifdef _WIN32
#include "common/windows_headers.h"
#endif
#define XBYAK_NO_OP_NAMES 1
#include "xbyak.h"
namespace CPU::NewRec {
class X64Compiler final : public Compiler
{
public:
X64Compiler();
~X64Compiler() override;
protected:
void DisassembleAndLog(const void* start, u32 size) override;
u32 GetHostInstructionCount(const void* start, u32 size) override;
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const Xbyak::Reg32& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf,
void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Reg8&),
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const Xbyak::Reg32& result);
void Compile_dst_op(CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&),
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), bool commutative,
bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const Xbyak::Reg32>& reg = std::nullopt);
template<typename RegAllocFn>
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const Xbyak::Reg32& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
void SwitchToNearCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
Xbyak::Address MipsPtr(Reg r) const;
Xbyak::Reg32 CFGetRegD(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegS(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegT(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegLO(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegHI(CompileFlags cf) const;
Xbyak::Reg32 MoveSToD(CompileFlags cf);
Xbyak::Reg32 MoveSToT(CompileFlags cf);
Xbyak::Reg32 MoveTToD(CompileFlags cf);
void MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf);
void MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf);
void MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg);
std::unique_ptr<Xbyak::CodeGenerator> m_emitter;
std::unique_ptr<Xbyak::CodeGenerator> m_far_emitter;
Xbyak::CodeGenerator* cg;
};
} // namespace CPU::NewRec

View File

@ -0,0 +1,164 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "bus.h"
#include "cpu_core_private.h"
#include "cpu_newrec.h"
#include "cpu_types.h"
#include "types.h"
#include "util/jit_code_buffer.h"
#include <unordered_map>
namespace CPU::NewRec {
enum : u32
{
LUT_TABLE_COUNT = 0x10000,
LUT_TABLE_SIZE = 0x10000 / sizeof(u32), // 16384, one for each PC
LUT_TABLE_SHIFT = 16,
MAX_BLOCK_EXIT_LINKS = 2,
};
using CodeLUT = const void**;
using CodeLUTArray = std::array<CodeLUT, LUT_TABLE_COUNT>;
using BlockLinkMap = std::unordered_multimap<u32, void*>; // TODO: try ordered?
enum RegInfoFlags : u8
{
RI_LIVE = (1 << 0),
RI_USED = (1 << 1),
RI_LASTUSE = (1 << 2),
};
struct InstructionInfo
{
u8 reg_flags[static_cast<u8>(Reg::count)];
// Reg write_reg[3];
Reg read_reg[3];
// If unset, values which are not live will not be written back to memory.
// Tends to break stuff at the moment.
static constexpr bool WRITE_DEAD_VALUES = true;
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
/// In other words, the register is worth keeping in a host register/caching it.
inline bool UsedTest(Reg reg) const { return (reg_flags[static_cast<u8>(reg)] & (RI_USED | RI_LASTUSE)) == RI_USED; }
/// Returns true if the value should be computed/written back.
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
inline bool LiveTest(Reg reg) const
{
return WRITE_DEAD_VALUES || ((reg_flags[static_cast<u8>(reg)] & RI_LIVE) != 0);
}
/// Returns true if the register can be renamed into another.
inline bool RenameTest(Reg reg) const { return (reg == Reg::zero || !UsedTest(reg) || !LiveTest(reg)); }
/// Returns true if this instruction reads this register.
inline bool ReadsReg(Reg reg) const { return (read_reg[0] == reg || read_reg[1] == reg || read_reg[2] == reg); }
};
enum class BlockState : u8
{
Valid,
Invalidated,
NeedsRecompile,
};
struct Block
{
u32 pc;
u32 size; // in guest instructions
const void* host_code;
// links to previous/next block within page
Block* next_block_in_page;
BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS];
u32 num_exit_links;
BlockState state;
// followed by Instruction * size, InstructionRegInfo * size
const Instruction* Instructions() const { return reinterpret_cast<const Instruction*>(this + 1); }
Instruction* Instructions() { return reinterpret_cast<Instruction*>(this + 1); }
const InstructionInfo* InstructionsInfo() const
{
return reinterpret_cast<const InstructionInfo*>(Instructions() + size);
}
InstructionInfo* InstructionsInfo() { return reinterpret_cast<InstructionInfo*>(Instructions() + size); }
};
using BlockLUTArray = std::array<Block**, LUT_TABLE_COUNT>;
struct LoadstoreBackpatchInfo
{
u32 guest_pc;
u32 gpr_bitmask;
u16 cycles;
u16 address_register : 5;
u16 data_register : 5;
u16 size : 2;
u16 is_signed : 1;
u16 is_load : 1;
u8 code_size;
u8 fault_count;
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
u32 AccessSizeInBytes() const { return 1u << size; }
};
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
static inline bool BlockInRAM(VirtualMemoryAddress pc)
{
return VirtualAddressToPhysical(pc) < Bus::g_ram_size;
}
enum class PageProtectionMode : u8
{
WriteProtected,
ManualCheck,
Unprotected,
};
struct PageProtectionInfo
{
Block* first_block_in_page;
Block* last_block_in_page;
PageProtectionMode mode;
u16 invalidate_count;
u32 invalidate_frame;
};
static_assert(sizeof(PageProtectionInfo) == 24);
Block* LookupBlock(u32 pc);
Block* CreateBlock(u32 pc);
bool RevalidateBlock(Block* block);
void CompileOrRevalidateBlock(u32 start_pc);
void DiscardAndRecompileBlock(u32 start_pc);
u32 CreateBlockLink(Block* from_block, void* code, u32 newpc);
PageProtectionMode GetProtectionModeForBlock(Block* block);
u32 CompileASMFunctions(u8* code, u32 code_size);
u32 EmitJump(void* code, const void* dst, bool flush_icache);
void SetFastMap(u32 pc, const void* function);
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
u32 BackpatchLoadStore(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, TickCount cycles_to_add,
TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register,
MemoryAccessSize size, bool is_signed, bool is_load);
extern CodeLUTArray g_fast_map;
extern NORETURN_FUNCTION_POINTER void(*g_enter_recompiler)();
extern const void* g_compile_or_revalidate_block;
extern const void* g_check_events_and_dispatch;
extern const void* g_dispatcher;
extern const void* g_interpret_block;
extern const void* g_discard_and_recompile_block;
} // namespace CPU::NewRec

View File

@ -979,8 +979,10 @@ void CodeGenerator::BlockPrologue()
EmitFunctionCall(nullptr, &Thunks::LogPC, Value::FromConstantU32(m_pc));
#endif
#if 0
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
EmitICacheCheckAndUpdate();
#endif
// we don't know the state of the last block, so assume load delays might be in progress
// TODO: Pull load delay into register cache
@ -1115,7 +1117,9 @@ void CodeGenerator::AddPendingCycles(bool commit)
if (commit)
{
m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
// m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
m_gte_done_cycle = 0;
m_gte_busy_cycles_dirty = true;
m_delayed_cycles_add = 0;
}
}

View File

@ -32,6 +32,8 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
const SpeculativeValue& address_spec, RegSize size)
{
#if 0
// TODO: re-enable once implemented in new-rec
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
{
TickCount read_ticks;
@ -59,6 +61,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
return result;
}
}
#endif
Value result = m_register_cache.AllocateScratch(HostPointerSize);
@ -118,6 +121,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
const SpeculativeValue& address_spec, RegSize size, const Value& value)
{
#if 0
// TODO: re-enable once implemented in new-rec
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
{
void* ptr = GetDirectWriteMemoryPointer(
@ -134,6 +139,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
return;
}
}
#endif
const bool use_fastmem =
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && !SpeculativeIsCacheIsolated();

View File

@ -2709,6 +2709,8 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
void CodeGenerator::EmitICacheCheckAndUpdate()
{
// TODO: reenable me
return;
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
{
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],

View File

@ -369,6 +369,11 @@ void ImGuiManager::DrawPerformanceOverlay()
text.append_fmt("{}{}", first ? "" : "/", "CI");
first = false;
}
else if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
{
text.append_fmt("{}{}", first ? "" : "/", "NR");
first = false;
}
else
{
if (g_settings.cpu_recompiler_icache)

View File

@ -662,17 +662,6 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages)
g_settings.rewind_enable = false;
}
if (g_settings.IsRunaheadEnabled())
{
// Block linking is good for performance, but hurts when regularly loading (i.e. runahead), since everything has to
// be unlinked. Which would be thousands of blocks.
if (g_settings.cpu_recompiler_block_linking)
{
Log_WarningPrintf("Disabling block linking due to runahead.");
g_settings.cpu_recompiler_block_linking = false;
}
}
// if challenge mode is enabled, disable things like rewind since they use save states
if (Achievements::IsHardcoreModeActive())
{
@ -834,11 +823,13 @@ const char* Settings::GetDiscRegionDisplayName(DiscRegion region)
return Host::TranslateToCString("DiscRegion", s_disc_region_display_names[static_cast<int>(region)]);
}
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler"};
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler",
"NewRec"};
static constexpr const std::array s_cpu_execution_mode_display_names = {
TRANSLATE_NOOP("CPUExecutionMode", "Interpreter (Slowest)"),
TRANSLATE_NOOP("CPUExecutionMode", "Cached Interpreter (Faster)"),
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)")};
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)"),
TRANSLATE_NOOP("CPUExecutionMode", "New Recompiler (Experimental)")};
std::optional<CPUExecutionMode> Settings::ParseCPUExecutionMode(const char* str)
{

View File

@ -255,7 +255,12 @@ struct Settings
bool log_to_file = false;
ALWAYS_INLINE bool IsUsingCodeCache() const { return (cpu_execution_mode != CPUExecutionMode::Interpreter); }
ALWAYS_INLINE bool IsUsingAnyRecompiler() const
{
return (cpu_execution_mode == CPUExecutionMode::Recompiler || cpu_execution_mode == CPUExecutionMode::NewRec);
}
ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); }
ALWAYS_INLINE bool IsUsingNewRec() const { return (cpu_execution_mode == CPUExecutionMode::NewRec); }
ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); }
ALWAYS_INLINE bool IsRunaheadEnabled() const { return (runahead_frames > 0); }
@ -277,8 +282,7 @@ struct Settings
ALWAYS_INLINE bool IsUsingFastmem() const
{
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler &&
!cpu_recompiler_memory_exceptions);
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && IsUsingAnyRecompiler() && !cpu_recompiler_memory_exceptions);
}
ALWAYS_INLINE s32 GetAudioOutputVolume(bool fast_forwarding) const
@ -422,7 +426,7 @@ struct Settings
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f;
#ifdef ENABLE_RECOMPILER
#if defined(ENABLE_RECOMPILER)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
// LUT still ends up faster on Apple Silicon for now, because of 16K pages.
@ -431,6 +435,9 @@ struct Settings
#else
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT;
#endif
#elif defined(ENABLE_NEWREC)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled;

View File

@ -3516,7 +3516,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
CPU::ClearICache();
}
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
if (g_settings.IsUsingAnyRecompiler() &&
(g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions ||
g_settings.cpu_recompiler_block_linking != old_settings.cpu_recompiler_block_linking ||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache ||

View File

@ -46,6 +46,7 @@ enum class CPUExecutionMode : u8
Interpreter,
CachedInterpreter,
Recompiler,
NewRec,
Count
};