New Recompiler
This commit is contained in:
parent
5166a946da
commit
d820c971c6
|
@ -23,6 +23,7 @@ endif()
|
|||
# Renderer options.
|
||||
option(ENABLE_OPENGL "Build with OpenGL renderer" ON)
|
||||
option(ENABLE_VULKAN "Build with Vulkan renderer" ON)
|
||||
option(ENABLE_NEWREC "Build with experimental new dynarec (needed for RISC-V)" ON)
|
||||
|
||||
# Global options.
|
||||
if(NOT ANDROID)
|
||||
|
|
|
@ -120,6 +120,14 @@ set(RECOMPILER_SRCS
|
|||
cpu_recompiler_types.h
|
||||
)
|
||||
|
||||
set(NEWREC_SOURCES
|
||||
cpu_newrec.cpp
|
||||
cpu_newrec.h
|
||||
cpu_newrec_compiler.cpp
|
||||
cpu_newrec_compiler.h
|
||||
cpu_newrec_private.h
|
||||
)
|
||||
|
||||
target_precompile_headers(core PRIVATE "pch.h")
|
||||
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
||||
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
||||
|
@ -130,9 +138,16 @@ if(${CPU_ARCH} STREQUAL "x64")
|
|||
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak")
|
||||
target_compile_definitions(core PUBLIC "XBYAK_NO_EXCEPTION=1" "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
|
||||
target_sources(core PRIVATE ${RECOMPILER_SRCS}
|
||||
cpu_recompiler_code_generator_x64.cpp
|
||||
)
|
||||
message("Building x64 recompiler")
|
||||
|
||||
if(ENABLE_NEWREC)
|
||||
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
|
||||
target_sources(core PRIVATE ${NEWREC_SOURCES}
|
||||
cpu_newrec_compiler_x64.cpp
|
||||
cpu_newrec_compiler_x64.h
|
||||
)
|
||||
message("Building x64 newrec")
|
||||
endif()
|
||||
elseif(${CPU_ARCH} STREQUAL "aarch32")
|
||||
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1")
|
||||
target_sources(core PRIVATE ${RECOMPILER_SRCS}
|
||||
|
@ -147,6 +162,25 @@ elseif(${CPU_ARCH} STREQUAL "aarch64")
|
|||
)
|
||||
target_link_libraries(core PUBLIC vixl)
|
||||
message("Building AArch64 recompiler")
|
||||
if(ENABLE_NEWREC)
|
||||
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
|
||||
target_sources(core PRIVATE ${NEWREC_SOURCES}
|
||||
cpu_newrec_compiler_aarch64.cpp
|
||||
cpu_newrec_compiler_aarch64.h
|
||||
)
|
||||
message("Building AArch64 newrec")
|
||||
endif()
|
||||
elseif(${CPU_ARCH} STREQUAL "riscv64")
|
||||
target_compile_definitions(core PUBLIC "WITH_MMAP_FASTMEM=1")
|
||||
if(ENABLE_NEWREC)
|
||||
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
|
||||
target_sources(core PRIVATE ${NEWREC_SOURCES}
|
||||
cpu_newrec_compiler_riscv64.cpp
|
||||
cpu_newrec_compiler_riscv64.h
|
||||
)
|
||||
target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas)
|
||||
message("Building RISC-V 64-bit newrec")
|
||||
endif()
|
||||
else()
|
||||
message("Not building recompiler")
|
||||
endif()
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
<PreprocessorDefinitions Condition="('$(Platform)'!='ARM64')">ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_NEWREC=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
|
||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories Condition="'$(Platform)'!='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\rainterface</AdditionalIncludeDirectories>
|
||||
|
|
|
@ -13,6 +13,14 @@
|
|||
<ClCompile Include="cpu_core.cpp" />
|
||||
<ClCompile Include="cpu_disasm.cpp" />
|
||||
<ClCompile Include="cpu_code_cache.cpp" />
|
||||
<ClCompile Include="cpu_newrec.cpp" />
|
||||
<ClCompile Include="cpu_newrec_compiler.cpp" />
|
||||
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="cpu_newrec_compiler_x64.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="cpu_recompiler_code_generator.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
|
@ -89,6 +97,15 @@
|
|||
<ClInclude Include="cpu_core_private.h" />
|
||||
<ClInclude Include="cpu_disasm.h" />
|
||||
<ClInclude Include="cpu_code_cache.h" />
|
||||
<ClInclude Include="cpu_newrec.h" />
|
||||
<ClInclude Include="cpu_newrec_compiler.h" />
|
||||
<ClInclude Include="cpu_newrec_compiler_aarch64.h">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="cpu_newrec_compiler_x64.h">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="cpu_newrec_private.h" />
|
||||
<ClInclude Include="cpu_recompiler_code_generator.h">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
|
@ -176,6 +193,9 @@
|
|||
<ProjectReference Include="..\..\dep\zstd\zstd.vcxproj">
|
||||
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\..\dep\zydis\zydis.vcxproj">
|
||||
<Project>{c51a346a-86b2-46df-9bb3-d0aa7e5d8699}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\scmversion\scmversion.vcxproj">
|
||||
<Project>{075ced82-6a20-46df-94c7-9624ac9ddbeb}</Project>
|
||||
</ProjectReference>
|
||||
|
|
|
@ -60,6 +60,10 @@
|
|||
<ClCompile Include="hotkeys.cpp" />
|
||||
<ClCompile Include="gpu_shadergen.cpp" />
|
||||
<ClCompile Include="pch.cpp" />
|
||||
<ClCompile Include="cpu_newrec_compiler.cpp" />
|
||||
<ClCompile Include="cpu_newrec.cpp" />
|
||||
<ClCompile Include="cpu_newrec_compiler_x64.cpp" />
|
||||
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="types.h" />
|
||||
|
@ -124,5 +128,10 @@
|
|||
<ClInclude Include="shader_cache_version.h" />
|
||||
<ClInclude Include="gpu_shadergen.h" />
|
||||
<ClInclude Include="pch.h" />
|
||||
<ClInclude Include="cpu_newrec.h" />
|
||||
<ClInclude Include="cpu_newrec_compiler.h" />
|
||||
<ClInclude Include="cpu_newrec_private.h" />
|
||||
<ClInclude Include="cpu_newrec_compiler_x64.h" />
|
||||
<ClInclude Include="cpu_newrec_compiler_aarch64.h" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "cpu_core.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_disasm.h"
|
||||
#include "cpu_newrec_private.h"
|
||||
#include "cpu_recompiler_types.h"
|
||||
#include "settings.h"
|
||||
#include "system.h"
|
||||
|
@ -29,7 +30,7 @@ static constexpr u32 RECOMPILE_FRAMES_TO_FALL_BACK_TO_INTERPRETER = 100;
|
|||
static constexpr u32 RECOMPILE_COUNT_TO_FALL_BACK_TO_INTERPRETER = 20;
|
||||
static constexpr u32 INVALIDATE_THRESHOLD_TO_DISABLE_LINKING = 10;
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
|
||||
|
||||
// Currently remapping the code buffer doesn't work in macOS or Haiku.
|
||||
#if !defined(__HAIKU__) && !defined(__APPLE__)
|
||||
|
@ -247,8 +248,8 @@ void Initialize()
|
|||
{
|
||||
Assert(s_blocks.empty());
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
|
||||
if (g_settings.IsUsingAnyRecompiler())
|
||||
{
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
const bool has_buffer = s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage),
|
||||
|
@ -263,7 +264,15 @@ void Initialize()
|
|||
}
|
||||
#endif
|
||||
|
||||
AllocateFastMap();
|
||||
#ifdef ENABLE_NEWREC
|
||||
if (g_settings.IsUsingNewRec())
|
||||
{
|
||||
if (!CPU::NewRec::Initialize())
|
||||
Panic("Failed to initialize newrec");
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
|
@ -297,7 +306,12 @@ void ClearState()
|
|||
|
||||
void Shutdown()
|
||||
{
|
||||
#ifdef ENABLE_NEWREC
|
||||
NewRec::Shutdown();
|
||||
#endif
|
||||
|
||||
ClearState();
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
ShutdownFastmem();
|
||||
FreeFastMap();
|
||||
|
@ -455,6 +469,12 @@ FastMapTable* GetFastMapPointer()
|
|||
break;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_NEWREC
|
||||
case CPUExecutionMode::NewRec:
|
||||
CPU::NewRec::Execute();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
{
|
||||
if (g_settings.gpu_pgxp_enable)
|
||||
|
@ -473,7 +493,7 @@ FastMapTable* GetFastMapPointer()
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
|
||||
|
||||
JitCodeBuffer& GetCodeBuffer()
|
||||
{
|
||||
|
@ -484,16 +504,18 @@ JitCodeBuffer& GetCodeBuffer()
|
|||
|
||||
void Reinitialize()
|
||||
{
|
||||
ClearState();
|
||||
#ifdef ENABLE_NEWREC
|
||||
NewRec::Shutdown();
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
ShutdownFastmem();
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
|
||||
s_code_buffer.Destroy();
|
||||
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
if (g_settings.IsUsingAnyRecompiler())
|
||||
{
|
||||
#ifdef USE_STATIC_CODE_BUFFER
|
||||
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
|
||||
|
@ -507,6 +529,14 @@ void Reinitialize()
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_NEWREC
|
||||
if (g_settings.IsUsingNewRec())
|
||||
{
|
||||
if (!CPU::NewRec::Initialize())
|
||||
Panic("Failed to reinitialize NewRec");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
{
|
||||
|
@ -522,6 +552,15 @@ void Reinitialize()
|
|||
|
||||
void Flush()
|
||||
{
|
||||
#ifdef ENABLE_NEWREC
|
||||
if (g_settings.IsUsingNewRec())
|
||||
{
|
||||
s_code_buffer.Reset();
|
||||
NewRec::Reset();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
ClearState();
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
if (g_settings.IsUsingRecompiler())
|
||||
|
@ -929,6 +968,14 @@ static void InvalidateBlock(CodeBlock* block, bool allow_frame_invalidation)
|
|||
|
||||
void InvalidateBlocksWithPageIndex(u32 page_index)
|
||||
{
|
||||
#ifdef ENABLE_NEWREC
|
||||
if (g_settings.IsUsingNewRec())
|
||||
{
|
||||
NewRec::InvalidateBlocksWithPageNumber(page_index);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
DebugAssert(page_index < Bus::RAM_8MB_CODE_PAGE_COUNT);
|
||||
auto& blocks = m_ram_block_map[page_index];
|
||||
for (CodeBlock* block : blocks)
|
||||
|
@ -941,6 +988,14 @@ void InvalidateBlocksWithPageIndex(u32 page_index)
|
|||
|
||||
void InvalidateAll()
|
||||
{
|
||||
#ifdef ENABLE_NEWREC
|
||||
if (g_settings.IsUsingNewRec())
|
||||
{
|
||||
NewRec::InvalidateAllRAMBlocks();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (auto& it : s_blocks)
|
||||
{
|
||||
CodeBlock* block = it.second;
|
||||
|
|
|
@ -134,7 +134,7 @@ using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer)
|
|||
FastMapTable* GetFastMapPointer();
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
|
||||
JitCodeBuffer& GetCodeBuffer();
|
||||
#endif
|
||||
|
||||
|
|
|
@ -2230,6 +2230,7 @@ void CPU::Execute()
|
|||
{
|
||||
case CPUExecutionMode::Recompiler:
|
||||
case CPUExecutionMode::CachedInterpreter:
|
||||
case CPUExecutionMode::NewRec:
|
||||
CodeCache::Execute();
|
||||
break;
|
||||
|
||||
|
|
|
@ -0,0 +1,953 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#include "cpu_newrec.h"
|
||||
#include "bus.h"
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/log.h"
|
||||
#include "cpu_code_cache.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_newrec_compiler.h"
|
||||
#include "cpu_newrec_private.h"
|
||||
#include "cpu_types.h"
|
||||
#include "settings.h"
|
||||
#include "system.h"
|
||||
#include "util/page_fault_handler.h"
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
Log_SetChannel(CPU::NewRec);
|
||||
|
||||
namespace CPU::NewRec {
|
||||
using LUTRangeList = std::array<std::pair<VirtualMemoryAddress, VirtualMemoryAddress>, 9>;
|
||||
using PageProtectionArray = std::array<PageProtectionInfo, Bus::RAM_8MB_CODE_PAGE_COUNT>;
|
||||
|
||||
static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
|
||||
static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
|
||||
static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);
|
||||
|
||||
static void InvalidCodeFunction();
|
||||
|
||||
static void AllocateLUTs();
|
||||
static void ResetLUTs();
|
||||
static void InvalidateBlock(Block* block, BlockState new_state);
|
||||
static void ClearBlocks();
|
||||
static void CompileASMFunctions();
|
||||
|
||||
static u32 ReadBlockInstructions(u32 start_pc);
|
||||
static void FillBlockRegInfo(Block* block);
|
||||
static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
|
||||
static void AddBlockToPageList(Block* block);
|
||||
static void BacklinkBlocks(u32 pc, const void* dst);
|
||||
static void UnlinkBlockExits(Block* block);
|
||||
|
||||
static bool InitializeFastmem();
|
||||
static void ShutdownFastmem();
|
||||
static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write);
|
||||
|
||||
// Fast map provides lookup from PC to function
|
||||
// Function pointers are offset so that you don't need to subtract
|
||||
CodeLUTArray g_fast_map;
|
||||
static BlockLUTArray s_block_map;
|
||||
static std::unique_ptr<const void*[]> s_lut_code_pointers;
|
||||
static std::unique_ptr<Block*[]> s_lut_block_pointers;
|
||||
static PageProtectionArray s_page_protection = {};
|
||||
static std::vector<Block*> s_blocks;
|
||||
static BlockLinkMap s_block_links;
|
||||
static bool s_lut_initialized = false;
|
||||
|
||||
// for compiling
|
||||
static std::vector<Instruction> s_block_instructions;
|
||||
|
||||
// fastmem stuff
|
||||
static std::unordered_map<const void*, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
|
||||
static std::unordered_set<u32> s_fastmem_faulting_pcs;
|
||||
|
||||
NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
|
||||
const void* g_compile_or_revalidate_block;
|
||||
const void* g_discard_and_recompile_block;
|
||||
const void* g_check_events_and_dispatch;
|
||||
const void* g_dispatcher;
|
||||
} // namespace CPU::NewRec
|
||||
|
||||
namespace CPU::NewRec {
|
||||
static constexpr u32 GetLUTTableCount(u32 start, u32 end)
|
||||
{
|
||||
return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1;
|
||||
}
|
||||
|
||||
static constexpr CPU::NewRec::LUTRangeList GetLUTRanges()
|
||||
{
|
||||
const CPU::NewRec::LUTRangeList ranges = {{
|
||||
{0x00000000, 0x00800000}, // RAM
|
||||
{0x1F000000, 0x1F800000}, // EXP1
|
||||
{0x1FC00000, 0x1FC80000}, // BIOS
|
||||
|
||||
{0x80000000, 0x80800000}, // RAM
|
||||
{0x9F000000, 0x9F800000}, // EXP1
|
||||
{0x9FC00000, 0x9FC80000}, // BIOS
|
||||
|
||||
{0xA0000000, 0xA0800000}, // RAM
|
||||
{0xBF000000, 0xBF800000}, // EXP1
|
||||
{0xBFC00000, 0xBFC80000} // BIOS
|
||||
}};
|
||||
return ranges;
|
||||
}
|
||||
|
||||
static constexpr u32 GetLUTSlotCount(bool include_unreachable)
|
||||
{
|
||||
u32 tables = include_unreachable ? 1 : 0; // unreachable table
|
||||
for (const auto& [start, end] : GetLUTRanges())
|
||||
tables += GetLUTTableCount(start, end);
|
||||
|
||||
return tables * LUT_TABLE_SIZE;
|
||||
}
|
||||
} // namespace CPU::NewRec
|
||||
|
||||
CPU::NewRec::CodeLUT CPU::NewRec::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
|
||||
{
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16));
|
||||
}
|
||||
|
||||
CPU::NewRec::CodeLUT CPU::NewRec::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
|
||||
{
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16));
|
||||
}
|
||||
|
||||
CPU::NewRec::CodeLUT CPU::NewRec::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
|
||||
{
|
||||
u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
|
||||
else
|
||||
return reinterpret_cast<const void**>(fake_byte_ptr + pc);
|
||||
}
|
||||
|
||||
void CPU::NewRec::AllocateLUTs()
|
||||
{
|
||||
constexpr u32 num_code_slots = GetLUTSlotCount(true);
|
||||
constexpr u32 num_block_slots = GetLUTSlotCount(false);
|
||||
|
||||
Assert(!s_lut_code_pointers && !s_lut_block_pointers);
|
||||
s_lut_code_pointers = std::make_unique<const void*[]>(num_code_slots);
|
||||
s_lut_block_pointers = std::make_unique<Block*[]>(num_block_slots);
|
||||
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots);
|
||||
|
||||
CodeLUT code_table_ptr = s_lut_code_pointers.get();
|
||||
Block** block_table_ptr = s_lut_block_pointers.get();
|
||||
CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots;
|
||||
Block** const block_table_ptr_end = block_table_ptr + num_block_slots;
|
||||
|
||||
// Make the unreachable table jump to the invalid code callback.
|
||||
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
|
||||
code_table_ptr[i] = reinterpret_cast<const void*>(InvalidCodeFunction);
|
||||
|
||||
// Mark everything as unreachable to begin with.
|
||||
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
|
||||
{
|
||||
g_fast_map[i] = EncodeCodeLUTPointer(i, code_table_ptr);
|
||||
s_block_map[i] = nullptr;
|
||||
}
|
||||
code_table_ptr += LUT_TABLE_SIZE;
|
||||
|
||||
// Allocate ranges.
|
||||
for (const auto& [start, end] : GetLUTRanges())
|
||||
{
|
||||
const u32 start_slot = start >> LUT_TABLE_SHIFT;
|
||||
const u32 count = GetLUTTableCount(start, end);
|
||||
for (u32 i = 0; i < count; i++)
|
||||
{
|
||||
const u32 slot = start_slot + i;
|
||||
|
||||
g_fast_map[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
|
||||
code_table_ptr += LUT_TABLE_SIZE;
|
||||
|
||||
s_block_map[slot] = block_table_ptr;
|
||||
block_table_ptr += LUT_TABLE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
Assert(code_table_ptr == code_table_ptr_end);
|
||||
Assert(block_table_ptr == block_table_ptr_end);
|
||||
}
|
||||
|
||||
void CPU::NewRec::ResetLUTs()
|
||||
{
|
||||
if (!s_lut_code_pointers)
|
||||
return;
|
||||
|
||||
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
|
||||
{
|
||||
CodeLUT ptr = DecodeCodeLUTPointer(i, g_fast_map[i]);
|
||||
if (ptr == s_lut_code_pointers.get())
|
||||
continue;
|
||||
|
||||
for (u32 j = 0; j < LUT_TABLE_SIZE; j++)
|
||||
ptr[j] = g_compile_or_revalidate_block;
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::SetFastMap(u32 pc, const void* function)
|
||||
{
|
||||
if (!s_lut_code_pointers)
|
||||
return;
|
||||
|
||||
const u32 table = pc >> LUT_TABLE_SHIFT;
|
||||
CodeLUT encoded_ptr = g_fast_map[table];
|
||||
|
||||
#ifdef _DEBUG
|
||||
const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
|
||||
DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
|
||||
#endif
|
||||
|
||||
*OffsetCodeLUTPointer(encoded_ptr, pc) = function;
|
||||
}
|
||||
|
||||
CPU::NewRec::Block* CPU::NewRec::LookupBlock(u32 pc)
|
||||
{
|
||||
const u32 table = pc >> LUT_TABLE_SHIFT;
|
||||
if (!s_block_map[table])
|
||||
return nullptr;
|
||||
|
||||
const u32 idx = (pc & 0xFFFF) >> 2;
|
||||
return s_block_map[table][idx];
|
||||
}
|
||||
|
||||
CPU::NewRec::Block* CPU::NewRec::CreateBlock(u32 pc)
|
||||
{
|
||||
const u32 size = ReadBlockInstructions(pc);
|
||||
if (size == 0)
|
||||
{
|
||||
Log_ErrorPrintf("Cannot compile block at pc %08X", pc);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const u32 table = pc >> LUT_TABLE_SHIFT;
|
||||
Assert(s_block_map[table]);
|
||||
|
||||
const u32 idx = (pc & 0xFFFF) >> 2;
|
||||
Block* block = s_block_map[table][idx];
|
||||
if (block)
|
||||
{
|
||||
// shouldn't be in the page list.. since we should come here after invalidating
|
||||
Assert(!block->next_block_in_page);
|
||||
|
||||
// if it has the same number of instructions, we can reuse it
|
||||
if (block->size != size)
|
||||
{
|
||||
// this sucks.. hopefully won't happen very often
|
||||
// TODO: allocate max size, allow shrink but not grow
|
||||
auto it = std::find(s_blocks.begin(), s_blocks.end(), block);
|
||||
Assert(it != s_blocks.end());
|
||||
s_blocks.erase(it);
|
||||
|
||||
std::free(block);
|
||||
block = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!block)
|
||||
{
|
||||
block =
|
||||
static_cast<Block*>(std::malloc(sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size)));
|
||||
Assert(block);
|
||||
s_blocks.push_back(block);
|
||||
}
|
||||
|
||||
block->pc = pc;
|
||||
block->size = size;
|
||||
block->host_code = nullptr;
|
||||
block->next_block_in_page = nullptr;
|
||||
block->num_exit_links = 0;
|
||||
block->state = BlockState::Valid;
|
||||
std::memcpy(block->Instructions(), s_block_instructions.data(), sizeof(Instruction) * size);
|
||||
s_block_map[table][idx] = block;
|
||||
|
||||
FillBlockRegInfo(block);
|
||||
|
||||
// add it to the tracking list for its page
|
||||
AddBlockToPageList(block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
bool CPU::NewRec::RevalidateBlock(Block* block)
|
||||
{
|
||||
DebugAssert(block->state != BlockState::Valid);
|
||||
DebugAssert(BlockInRAM(block->pc));
|
||||
|
||||
if (block->state == BlockState::NeedsRecompile)
|
||||
return false;
|
||||
|
||||
// blocks shouldn't be wrapping..
|
||||
const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc);
|
||||
DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size);
|
||||
|
||||
// can just do a straight memcmp..
|
||||
if (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) != 0)
|
||||
{
|
||||
// changed, needs recompiling
|
||||
Log_DebugPrintf("Block at PC %08X has changed and needs recompiling", block->pc);
|
||||
return false;
|
||||
}
|
||||
|
||||
block->state = BlockState::Valid;
|
||||
AddBlockToPageList(block);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CPU::NewRec::CompileOrRevalidateBlock(u32 start_pc)
|
||||
{
|
||||
// TODO: this doesn't currently handle when the cache overflows...
|
||||
|
||||
Block* block = LookupBlock(start_pc);
|
||||
if (block)
|
||||
{
|
||||
// we should only be here if the block got invalidated
|
||||
DebugAssert(block->state != BlockState::Valid);
|
||||
if (RevalidateBlock(block))
|
||||
{
|
||||
SetFastMap(start_pc, block->host_code);
|
||||
BacklinkBlocks(start_pc, block->host_code);
|
||||
return;
|
||||
}
|
||||
|
||||
// remove outward links from this block, since we're recompiling it
|
||||
UnlinkBlockExits(block);
|
||||
}
|
||||
|
||||
block = CreateBlock(start_pc);
|
||||
if (!block)
|
||||
Panic("Failed to create block, TODO fallback to interpreter");
|
||||
|
||||
block->host_code = g_compiler->CompileBlock(block);
|
||||
if (!block->host_code)
|
||||
{
|
||||
// block failed to compile
|
||||
// TODO: this shouldn't backlink
|
||||
block->host_code = reinterpret_cast<const void*>(&CPU::CodeCache::InterpretUncachedBlock<PGXPMode::Disabled>);
|
||||
Panic("Block failed compilation");
|
||||
}
|
||||
|
||||
SetFastMap(start_pc, block->host_code);
|
||||
BacklinkBlocks(start_pc, block->host_code);
|
||||
}
|
||||
|
||||
void CPU::NewRec::DiscardAndRecompileBlock(u32 start_pc)
|
||||
{
|
||||
Log_DevPrintf("Discard block %08X with manual protection", start_pc);
|
||||
Block* block = LookupBlock(start_pc);
|
||||
DebugAssert(block && block->state == BlockState::Valid);
|
||||
InvalidateBlock(block, BlockState::NeedsRecompile);
|
||||
CompileOrRevalidateBlock(start_pc);
|
||||
}
|
||||
|
||||
void CPU::NewRec::AddBlockToPageList(Block* block)
|
||||
{
|
||||
if (!BlockInRAM(block->pc) || block->next_block_in_page)
|
||||
return;
|
||||
|
||||
// TODO: what about blocks which span more than one page?
|
||||
const u32 page_idx = Bus::GetRAMCodePageIndex(block->pc);
|
||||
PageProtectionInfo& entry = s_page_protection[page_idx];
|
||||
if (entry.mode != PageProtectionMode::WriteProtected)
|
||||
return;
|
||||
|
||||
Bus::SetRAMCodePage(page_idx);
|
||||
|
||||
if (entry.last_block_in_page)
|
||||
{
|
||||
entry.last_block_in_page->next_block_in_page = block;
|
||||
entry.last_block_in_page = block;
|
||||
}
|
||||
else
|
||||
{
|
||||
entry.first_block_in_page = block;
|
||||
entry.last_block_in_page = block;
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::InvalidateBlocksWithPageNumber(u32 index)
|
||||
{
|
||||
DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
|
||||
Bus::ClearRAMCodePage(index);
|
||||
|
||||
BlockState new_block_state = BlockState::Invalidated;
|
||||
PageProtectionInfo& ppi = s_page_protection[index];
|
||||
|
||||
const u32 frame_number = System::GetFrameNumber();
|
||||
const u32 frame_delta = frame_number - ppi.invalidate_frame;
|
||||
ppi.invalidate_count++;
|
||||
|
||||
if (frame_delta >= 10)
|
||||
{
|
||||
ppi.invalidate_count = 1;
|
||||
ppi.invalidate_frame = frame_number;
|
||||
}
|
||||
else if (ppi.invalidate_count > 3)
|
||||
{
|
||||
Log_DevPrintf("%u invalidations to page %u in %u frames, switching to manual protection", ppi.invalidate_count,
|
||||
index, frame_delta);
|
||||
ppi.mode = PageProtectionMode::ManualCheck;
|
||||
new_block_state = BlockState::NeedsRecompile;
|
||||
}
|
||||
|
||||
Block* block = ppi.first_block_in_page;
|
||||
while (block)
|
||||
{
|
||||
InvalidateBlock(block, new_block_state);
|
||||
|
||||
Block* next_block = block->next_block_in_page;
|
||||
block->next_block_in_page = nullptr;
|
||||
block = next_block;
|
||||
}
|
||||
|
||||
ppi.first_block_in_page = nullptr;
|
||||
ppi.last_block_in_page = nullptr;
|
||||
}
|
||||
|
||||
CPU::NewRec::PageProtectionMode CPU::NewRec::GetProtectionModeForBlock(Block* block)
|
||||
{
|
||||
if (!BlockInRAM(block->pc))
|
||||
return PageProtectionMode::Unprotected;
|
||||
|
||||
const u32 page_idx = Bus::GetRAMCodePageIndex(block->pc);
|
||||
const PageProtectionInfo& ppi = s_page_protection[page_idx];
|
||||
return ppi.mode;
|
||||
}
|
||||
|
||||
u32 CPU::NewRec::CreateBlockLink(Block* block, void* code, u32 newpc)
|
||||
{
|
||||
const void* dst = g_dispatcher;
|
||||
if (g_settings.cpu_recompiler_block_linking)
|
||||
{
|
||||
const Block* next_block = LookupBlock(newpc);
|
||||
dst =
|
||||
(next_block && next_block->state == BlockState::Valid) ? next_block->host_code : g_compile_or_revalidate_block;
|
||||
|
||||
BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code);
|
||||
DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
|
||||
block->exit_links[block->num_exit_links++] = iter;
|
||||
}
|
||||
|
||||
Log_DebugPrintf("Linking %p with dst pc %08X to %p%s", code, newpc, dst,
|
||||
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
|
||||
return EmitJump(code, dst, false);
|
||||
}
|
||||
|
||||
void CPU::NewRec::BacklinkBlocks(u32 pc, const void* dst)
|
||||
{
|
||||
if (!g_settings.cpu_recompiler_block_linking)
|
||||
return;
|
||||
|
||||
const auto link_range = s_block_links.equal_range(pc);
|
||||
for (auto it = link_range.first; it != link_range.second; ++it)
|
||||
{
|
||||
Log_DebugPrintf("Backlinking %p with dst pc %08X to %p%s", it->second, pc, dst,
|
||||
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
|
||||
EmitJump(it->second, dst, true);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::UnlinkBlockExits(Block* block)
|
||||
{
|
||||
for (u32 i = 0; i < block->num_exit_links; i++)
|
||||
s_block_links.erase(block->exit_links[i]);
|
||||
block->num_exit_links = 0;
|
||||
}
|
||||
|
||||
void CPU::NewRec::InvalidCodeFunction()
|
||||
{
|
||||
Panic("fixme");
|
||||
}
|
||||
|
||||
void CPU::NewRec::CompileASMFunctions()
|
||||
{
|
||||
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
|
||||
DebugAssert(buffer.GetTotalUsed() == 0);
|
||||
const u32 asm_size = CompileASMFunctions(buffer.GetFreeCodePointer(), buffer.GetFreeCodeSpace());
|
||||
Log_ProfilePrintf("ASM functions generated %u bytes of host code", asm_size);
|
||||
buffer.CommitCode(asm_size);
|
||||
}
|
||||
|
||||
bool CPU::NewRec::Initialize()
|
||||
{
|
||||
if (!s_lut_initialized)
|
||||
{
|
||||
s_lut_initialized = true;
|
||||
AllocateLUTs();
|
||||
}
|
||||
|
||||
CompileASMFunctions();
|
||||
ResetLUTs();
|
||||
|
||||
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CPU::NewRec::Shutdown()
|
||||
{
|
||||
if (!s_lut_initialized)
|
||||
return;
|
||||
|
||||
ClearBlocks();
|
||||
ShutdownFastmem();
|
||||
}
|
||||
|
||||
[[noreturn]] void CPU::NewRec::Execute()
|
||||
{
|
||||
g_enter_recompiler();
|
||||
}
|
||||
|
||||
void CPU::NewRec::InvalidateBlock(Block* block, BlockState new_state)
|
||||
{
|
||||
if (block->state == BlockState::Valid)
|
||||
{
|
||||
SetFastMap(block->pc, g_compile_or_revalidate_block);
|
||||
BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
|
||||
}
|
||||
|
||||
block->state = new_state;
|
||||
}
|
||||
|
||||
void CPU::NewRec::InvalidateAllRAMBlocks()
|
||||
{
|
||||
// TODO: maybe combine the backlink into one big instruction flush cache?
|
||||
|
||||
for (Block* block : s_blocks)
|
||||
{
|
||||
if (BlockInRAM(block->pc))
|
||||
InvalidateBlock(block, BlockState::Invalidated);
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::NewRec::ClearBlocks()
|
||||
{
|
||||
for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
|
||||
{
|
||||
PageProtectionInfo& ppi = s_page_protection[i];
|
||||
if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page)
|
||||
Bus::ClearRAMCodePage(i);
|
||||
|
||||
ppi = {};
|
||||
}
|
||||
|
||||
s_fastmem_backpatch_info.clear();
|
||||
s_fastmem_faulting_pcs.clear();
|
||||
s_block_links.clear();
|
||||
for (Block* block : s_blocks)
|
||||
std::free(block);
|
||||
s_blocks.clear();
|
||||
|
||||
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false));
|
||||
}
|
||||
|
||||
void CPU::NewRec::Reset()
|
||||
{
|
||||
ClearBlocks();
|
||||
CompileASMFunctions();
|
||||
ResetLUTs();
|
||||
|
||||
if (g_settings.IsUsingFastmem())
|
||||
CPU::UpdateMemoryPointers();
|
||||
}
|
||||
|
||||
bool CPU::NewRec::InitializeFastmem()
|
||||
{
|
||||
const CPUFastmemMode mode = g_settings.cpu_fastmem_mode;
|
||||
Assert(mode == CPUFastmemMode::MMap);
|
||||
|
||||
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
|
||||
if (!Common::PageFaultHandler::InstallHandler(&g_fast_map, buffer.GetCodePointer(), buffer.GetTotalSize(),
|
||||
&PageFaultHandler))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to install page fault handler");
|
||||
return false;
|
||||
}
|
||||
|
||||
Bus::UpdateFastmemViews(mode);
|
||||
CPU::UpdateMemoryPointers();
|
||||
return true;
|
||||
}
|
||||
|
||||
void CPU::NewRec::ShutdownFastmem()
|
||||
{
|
||||
Common::PageFaultHandler::RemoveHandler(&g_fast_map);
|
||||
Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
|
||||
CPU::UpdateMemoryPointers();
|
||||
}
|
||||
|
||||
void CPU::NewRec::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
|
||||
bool is_load)
|
||||
{
|
||||
DebugAssert(code_size < std::numeric_limits<u8>::max());
|
||||
DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
|
||||
|
||||
auto iter = s_fastmem_backpatch_info.find(code_address);
|
||||
if (iter != s_fastmem_backpatch_info.end())
|
||||
s_fastmem_backpatch_info.erase(iter);
|
||||
|
||||
const LoadstoreBackpatchInfo info{
|
||||
guest_pc, gpr_bitmask, static_cast<u16>(cycles), address_register, data_register, static_cast<u16>(size),
|
||||
is_signed, is_load, static_cast<u8>(code_size), static_cast<u8>(0)};
|
||||
s_fastmem_backpatch_info.emplace(code_address, info);
|
||||
}
|
||||
|
||||
Common::PageFaultHandler::HandlerResult CPU::NewRec::PageFaultHandler(void* exception_pc, void* fault_address,
|
||||
bool is_write)
|
||||
{
|
||||
if (static_cast<u8*>(fault_address) < static_cast<u8*>(g_state.fastmem_base) ||
|
||||
(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)) >=
|
||||
static_cast<ptrdiff_t>(Bus::FASTMEM_ARENA_SIZE))
|
||||
{
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
const PhysicalMemoryAddress guest_address = static_cast<PhysicalMemoryAddress>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)));
|
||||
|
||||
Log_DevPrintf("Page fault handler invoked at PC=%p Address=%p %s, fastmem offset 0x%08X", exception_pc, fault_address,
|
||||
is_write ? "(write)" : "(read)", guest_address);
|
||||
|
||||
auto iter = s_fastmem_backpatch_info.find(exception_pc);
|
||||
if (iter == s_fastmem_backpatch_info.end())
|
||||
{
|
||||
Log_ErrorPrintf("No backpatch info found for %p", exception_pc);
|
||||
return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler;
|
||||
}
|
||||
|
||||
// if we're writing to ram, let it go through a few times, and use manual block protection to sort it out
|
||||
LoadstoreBackpatchInfo& info = iter->second;
|
||||
if (is_write && !g_state.cop0_regs.sr.Isc && Bus::IsRAMAddress(guest_address) && info.fault_count < 10)
|
||||
{
|
||||
Log_DevPrintf("Ignoring fault due to RAM write");
|
||||
InvalidateBlocksWithPageNumber(Bus::GetRAMCodePageIndex(guest_address));
|
||||
info.fault_count++;
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
|
||||
Log_DevPrintf("Backpatching %s at %p[%u] (pc %08X addr %08X): Bitmask %08X Addr %u Data %u Size %u Signed %02X",
|
||||
info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address,
|
||||
info.gpr_bitmask, info.address_register, info.data_register, info.AccessSizeInBytes(), info.is_signed);
|
||||
|
||||
// remove the cycles we added for the memory read, then take them off again after the backpatch
|
||||
// the normal rec path will add the ram read ticks later, so we need to take them off at the end
|
||||
DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS);
|
||||
const TickCount cycles_to_add =
|
||||
static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0);
|
||||
const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles));
|
||||
|
||||
JitCodeBuffer& buffer = CodeCache::GetCodeBuffer();
|
||||
const u32 thunk_size =
|
||||
BackpatchLoadStore(buffer.GetFreeFarCodePointer(), buffer.GetFreeFarCodeSpace(), exception_pc, info.code_size,
|
||||
cycles_to_add, cycles_to_remove, info.gpr_bitmask, info.address_register, info.data_register,
|
||||
info.AccessSize(), info.is_signed, info.is_load);
|
||||
buffer.CommitFarCode(thunk_size);
|
||||
|
||||
// TODO: queue block for recompilation later
|
||||
|
||||
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
|
||||
s_fastmem_faulting_pcs.insert(info.guest_pc);
|
||||
s_fastmem_backpatch_info.erase(iter);
|
||||
return Common::PageFaultHandler::HandlerResult::ContinueExecution;
|
||||
}
|
||||
|
||||
// TODO: move this into the compiler
|
||||
|
||||
u32 CPU::NewRec::ReadBlockInstructions(u32 start_pc)
|
||||
{
|
||||
u32 pc = start_pc;
|
||||
bool is_branch = false;
|
||||
bool is_branch_delay = false;
|
||||
|
||||
// TODO: Jump to other block if it exists at this pc?
|
||||
|
||||
s_block_instructions.clear();
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Instruction i;
|
||||
if (!SafeReadInstruction(pc, &i.bits) || !IsInvalidInstruction(i))
|
||||
break;
|
||||
|
||||
is_branch_delay = is_branch;
|
||||
is_branch = IsBranchInstruction(i);
|
||||
s_block_instructions.push_back(i);
|
||||
pc += sizeof(Instruction);
|
||||
|
||||
if (is_branch_delay)
|
||||
break;
|
||||
|
||||
if (IsExitBlockInstruction(i))
|
||||
break;
|
||||
}
|
||||
|
||||
return static_cast<u32>(s_block_instructions.size());
|
||||
}
|
||||
|
||||
void CPU::NewRec::SetRegAccess(InstructionInfo* inst, Reg reg, bool write)
|
||||
{
|
||||
if (reg == Reg::zero)
|
||||
return;
|
||||
|
||||
if (!write)
|
||||
{
|
||||
for (u32 i = 0; i < std::size(inst->read_reg); i++)
|
||||
{
|
||||
if (inst->read_reg[i] == Reg::zero)
|
||||
{
|
||||
inst->read_reg[i] = reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 0
|
||||
for (u32 i = 0; i < std::size(inst->write_reg); i++)
|
||||
{
|
||||
if (inst->write_reg[i] == Reg::zero)
|
||||
{
|
||||
inst->write_reg[i] = reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#define BackpropSetReads(reg) \
|
||||
do \
|
||||
{ \
|
||||
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
|
||||
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
|
||||
prev->reg_flags[static_cast<u8>(reg)] |= RI_LIVE | RI_USED; \
|
||||
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
|
||||
SetRegAccess(inst, reg, false); \
|
||||
} while (0)
|
||||
|
||||
#define BackpropSetWrites(reg) \
|
||||
do \
|
||||
{ \
|
||||
prev->reg_flags[static_cast<u8>(reg)] &= ~(RI_LIVE | RI_USED); \
|
||||
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
|
||||
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
|
||||
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
|
||||
SetRegAccess(inst, reg, true); \
|
||||
} while (0)
|
||||
|
||||
// TODO: memory loads should be delayed one instruction because of stupid load delays.
|
||||
#define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg)
|
||||
|
||||
void CPU::NewRec::FillBlockRegInfo(Block* block)
|
||||
{
|
||||
const Instruction* iinst = block->Instructions() + (block->size - 1);
|
||||
InstructionInfo* const start = block->InstructionsInfo();
|
||||
InstructionInfo* inst = start + (block->size - 1);
|
||||
std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags));
|
||||
std::memset(inst->read_reg, 0, sizeof(inst->read_reg));
|
||||
// std::memset(inst->write_reg, 0, sizeof(inst->write_reg));
|
||||
|
||||
while (inst != start)
|
||||
{
|
||||
InstructionInfo* prev = inst - 1;
|
||||
std::memcpy(prev, inst, sizeof(InstructionInfo));
|
||||
|
||||
const Reg rs = iinst->r.rs;
|
||||
const Reg rt = iinst->r.rt;
|
||||
|
||||
switch (iinst->op)
|
||||
{
|
||||
case InstructionOp::funct:
|
||||
{
|
||||
const Reg rd = iinst->r.rd;
|
||||
|
||||
switch (iinst->r.funct)
|
||||
{
|
||||
case InstructionFunct::sll:
|
||||
case InstructionFunct::srl:
|
||||
case InstructionFunct::sra:
|
||||
BackpropSetWrites(rd);
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
|
||||
case InstructionFunct::sllv:
|
||||
case InstructionFunct::srlv:
|
||||
case InstructionFunct::srav:
|
||||
case InstructionFunct::add:
|
||||
case InstructionFunct::addu:
|
||||
case InstructionFunct::sub:
|
||||
case InstructionFunct::subu:
|
||||
case InstructionFunct::and_:
|
||||
case InstructionFunct::or_:
|
||||
case InstructionFunct::xor_:
|
||||
case InstructionFunct::nor:
|
||||
case InstructionFunct::slt:
|
||||
case InstructionFunct::sltu:
|
||||
BackpropSetWrites(rd);
|
||||
BackpropSetReads(rt);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionFunct::jr:
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionFunct::jalr:
|
||||
BackpropSetReads(rs);
|
||||
BackpropSetWrites(rd);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mfhi:
|
||||
BackpropSetWrites(rd);
|
||||
BackpropSetReads(Reg::hi);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mflo:
|
||||
BackpropSetWrites(rd);
|
||||
BackpropSetReads(Reg::lo);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mthi:
|
||||
BackpropSetWrites(Reg::hi);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mtlo:
|
||||
BackpropSetWrites(Reg::lo);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionFunct::mult:
|
||||
case InstructionFunct::multu:
|
||||
case InstructionFunct::div:
|
||||
case InstructionFunct::divu:
|
||||
BackpropSetWrites(Reg::hi);
|
||||
BackpropSetWrites(Reg::lo);
|
||||
BackpropSetReads(rs);
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
|
||||
case InstructionFunct::syscall:
|
||||
case InstructionFunct::break_:
|
||||
break;
|
||||
|
||||
default:
|
||||
Log_ErrorPrintf("Unknown funct %u", static_cast<u32>(iinst->r.funct.GetValue()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::b:
|
||||
{
|
||||
if ((static_cast<u8>(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10))
|
||||
BackpropSetWrites(Reg::ra);
|
||||
BackpropSetReads(rs);
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::j:
|
||||
break;
|
||||
|
||||
case InstructionOp::jal:
|
||||
BackpropSetWrites(Reg::ra);
|
||||
break;
|
||||
|
||||
case InstructionOp::beq:
|
||||
case InstructionOp::bne:
|
||||
BackpropSetReads(rs);
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
|
||||
case InstructionOp::blez:
|
||||
case InstructionOp::bgtz:
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionOp::addi:
|
||||
case InstructionOp::addiu:
|
||||
case InstructionOp::slti:
|
||||
case InstructionOp::sltiu:
|
||||
case InstructionOp::andi:
|
||||
case InstructionOp::ori:
|
||||
case InstructionOp::xori:
|
||||
BackpropSetWrites(rt);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionOp::lui:
|
||||
BackpropSetWrites(rt);
|
||||
break;
|
||||
|
||||
case InstructionOp::lb:
|
||||
case InstructionOp::lh:
|
||||
case InstructionOp::lw:
|
||||
case InstructionOp::lbu:
|
||||
case InstructionOp::lhu:
|
||||
BackpropSetWritesDelayed(rt);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionOp::lwl:
|
||||
case InstructionOp::lwr:
|
||||
BackpropSetWritesDelayed(rt);
|
||||
BackpropSetReads(rs);
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
|
||||
case InstructionOp::sb:
|
||||
case InstructionOp::sh:
|
||||
case InstructionOp::swl:
|
||||
case InstructionOp::sw:
|
||||
case InstructionOp::swr:
|
||||
BackpropSetReads(rt);
|
||||
BackpropSetReads(rs);
|
||||
break;
|
||||
|
||||
case InstructionOp::cop0:
|
||||
case InstructionOp::cop2:
|
||||
{
|
||||
if (iinst->cop.IsCommonInstruction())
|
||||
{
|
||||
switch (iinst->cop.CommonOp())
|
||||
{
|
||||
case CopCommonInstruction::mfcn:
|
||||
case CopCommonInstruction::cfcn:
|
||||
BackpropSetWritesDelayed(rt);
|
||||
break;
|
||||
|
||||
case CopCommonInstruction::mtcn:
|
||||
case CopCommonInstruction::ctcn:
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionOp::lwc2:
|
||||
case InstructionOp::swc2:
|
||||
BackpropSetReads(rs);
|
||||
BackpropSetReads(rt);
|
||||
break;
|
||||
|
||||
default:
|
||||
Log_ErrorPrintf("Unknown op %u", static_cast<u32>(iinst->r.funct.GetValue()));
|
||||
break;
|
||||
}
|
||||
} // end switch
|
||||
|
||||
inst--;
|
||||
iinst--;
|
||||
} // end while
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
#ifdef ENABLE_NEWREC
|
||||
|
||||
namespace CPU::NewRec
|
||||
{
|
||||
bool Initialize();
|
||||
void Reset();
|
||||
void Shutdown();
|
||||
[[noreturn]] void Execute();
|
||||
void InvalidateAllRAMBlocks();
|
||||
void InvalidateBlocksWithPageNumber(u32 index);
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,458 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "cpu_newrec.h"
|
||||
#include "cpu_newrec_private.h"
|
||||
#include "cpu_types.h"
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace CPU::NewRec {
|
||||
|
||||
// Global options
|
||||
static constexpr bool EMULATE_LOAD_DELAYS = true;
|
||||
static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true;
|
||||
|
||||
// Arch-specific options
|
||||
#if defined(CPU_ARCH_X64)
|
||||
static constexpr u32 NUM_HOST_REGS = 16;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = true;
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
static constexpr u32 NUM_HOST_REGS = 32;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = false;
|
||||
#elif defined(CPU_ARCH_RISCV64)
|
||||
static constexpr u32 NUM_HOST_REGS = 32;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = false;
|
||||
#endif
|
||||
|
||||
class Compiler
|
||||
{
|
||||
public:
|
||||
Compiler();
|
||||
virtual ~Compiler();
|
||||
|
||||
const void* CompileBlock(Block* block);
|
||||
|
||||
protected:
|
||||
enum FlushFlags : u32
|
||||
{
|
||||
FLUSH_FLUSH_MIPS_REGISTERS = (1 << 0),
|
||||
FLUSH_INVALIDATE_MIPS_REGISTERS = (1 << 1),
|
||||
FLUSH_FREE_CALLER_SAVED_REGISTERS = (1 << 2),
|
||||
FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS = (1 << 3),
|
||||
FLUSH_FREE_ALL_REGISTERS = (1 << 4),
|
||||
FLUSH_PC = (1 << 5),
|
||||
FLUSH_INSTRUCTION_BITS = (1 << 6),
|
||||
FLUSH_CYCLES = (1 << 7),
|
||||
FLUSH_LOAD_DELAY = (1 << 8),
|
||||
FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9),
|
||||
FLUSH_GTE_DONE_CYCLE = (1 << 10),
|
||||
FLUSH_GTE_STALL_FROM_STATE = (1 << 11),
|
||||
|
||||
FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS),
|
||||
FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE),
|
||||
FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS),
|
||||
FLUSH_FOR_EXCEPTION =
|
||||
(FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next.
|
||||
FLUSH_FOR_INTERPRETER =
|
||||
(FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC |
|
||||
FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE),
|
||||
FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS |
|
||||
FLUSH_GTE_STALL_FROM_STATE),
|
||||
};
|
||||
|
||||
union CompileFlags
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 const_s : 1; // S is constant
|
||||
u32 const_t : 1; // T is constant
|
||||
u32 const_lo : 1; // LO is constant
|
||||
u32 const_hi : 1; // HI is constant
|
||||
|
||||
u32 valid_host_d : 1; // D is valid in host register
|
||||
u32 valid_host_s : 1; // S is valid in host register
|
||||
u32 valid_host_t : 1; // T is valid in host register
|
||||
u32 valid_host_lo : 1; // LO is valid in host register
|
||||
u32 valid_host_hi : 1; // HI is valid in host register
|
||||
|
||||
u32 host_d : 5; // D host register
|
||||
u32 host_s : 5; // S host register
|
||||
u32 host_t : 5; // T host register
|
||||
u32 host_lo : 5; // LO host register
|
||||
|
||||
u32 delay_slot_swapped : 1;
|
||||
u32 pad1 : 2; // 28..31
|
||||
|
||||
u32 host_hi : 5; // HI host register
|
||||
|
||||
u32 mips_s : 5; // S guest register
|
||||
u32 mips_t : 5; // T guest register
|
||||
|
||||
u32 pad2 : 15; // 32 bits
|
||||
};
|
||||
|
||||
u64 bits;
|
||||
|
||||
ALWAYS_INLINE Reg MipsS() const { return static_cast<Reg>(mips_s); }
|
||||
ALWAYS_INLINE Reg MipsT() const { return static_cast<Reg>(mips_t); }
|
||||
};
|
||||
static_assert(sizeof(CompileFlags) == sizeof(u64));
|
||||
|
||||
enum TemplateFlag : u32
|
||||
{
|
||||
TF_READS_S = (1 << 0),
|
||||
TF_READS_T = (1 << 1),
|
||||
TF_READS_LO = (1 << 2),
|
||||
TF_READS_HI = (1 << 3),
|
||||
TF_WRITES_D = (1 << 4),
|
||||
TF_WRITES_T = (1 << 5),
|
||||
TF_WRITES_LO = (1 << 6),
|
||||
TF_WRITES_HI = (1 << 7),
|
||||
TF_COMMUTATIVE = (1 << 8), // S op T == T op S
|
||||
TF_CAN_OVERFLOW = (1 << 9),
|
||||
|
||||
// TF_NORENAME = // TODO
|
||||
TF_LOAD_DELAY = (1 << 10),
|
||||
TF_GTE_STALL = (1 << 11),
|
||||
|
||||
TF_NO_NOP = (1 << 12),
|
||||
TF_NEEDS_REG_S = (1 << 13),
|
||||
TF_NEEDS_REG_T = (1 << 14),
|
||||
TF_CAN_SWAP_DELAY_SLOT = (1 << 15),
|
||||
|
||||
TF_RENAME_WITH_ZERO_T = (1 << 16), // add commutative for S as well
|
||||
TF_RENAME_WITH_ZERO_IMM = (1 << 17),
|
||||
|
||||
TF_PGXP_WITHOUT_CPU = (1 << 18),
|
||||
};
|
||||
|
||||
enum HostRegFlags : u8
|
||||
{
|
||||
HR_ALLOCATED = (1 << 0),
|
||||
HR_NEEDED = (1 << 1),
|
||||
HR_MODE_READ = (1 << 2), // valid
|
||||
HR_MODE_WRITE = (1 << 3), // dirty
|
||||
|
||||
HR_USABLE = (1 << 7),
|
||||
HR_CALLEE_SAVED = (1 << 6),
|
||||
|
||||
ALLOWED_HR_FLAGS = HR_MODE_READ | HR_MODE_WRITE,
|
||||
IMMUTABLE_HR_FLAGS = HR_USABLE | HR_CALLEE_SAVED,
|
||||
};
|
||||
|
||||
enum HostRegAllocType : u8
|
||||
{
|
||||
HR_TYPE_TEMP,
|
||||
HR_TYPE_CPU_REG,
|
||||
HR_TYPE_PC_WRITEBACK,
|
||||
HR_TYPE_LOAD_DELAY_VALUE,
|
||||
HR_TYPE_NEXT_LOAD_DELAY_VALUE,
|
||||
};
|
||||
|
||||
struct HostRegAlloc
|
||||
{
|
||||
u8 flags;
|
||||
HostRegAllocType type;
|
||||
Reg reg;
|
||||
u16 counter;
|
||||
};
|
||||
|
||||
enum class BranchCondition : u8
|
||||
{
|
||||
Equal,
|
||||
NotEqual,
|
||||
GreaterThanZero,
|
||||
GreaterEqualZero,
|
||||
LessThanZero,
|
||||
LessEqualZero,
|
||||
};
|
||||
|
||||
ALWAYS_INLINE bool HasConstantReg(Reg r) const { return m_constant_regs_valid.test(static_cast<u32>(r)); }
|
||||
ALWAYS_INLINE bool HasDirtyConstantReg(Reg r) const { return m_constant_regs_dirty.test(static_cast<u32>(r)); }
|
||||
ALWAYS_INLINE bool HasConstantRegValue(Reg r, u32 val) const
|
||||
{
|
||||
return m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u32>(r)] == val;
|
||||
}
|
||||
ALWAYS_INLINE u32 GetConstantRegU32(Reg r) const { return m_constant_reg_values[static_cast<u32>(r)]; }
|
||||
ALWAYS_INLINE s32 GetConstantRegS32(Reg r) const
|
||||
{
|
||||
return static_cast<s32>(m_constant_reg_values[static_cast<u32>(r)]);
|
||||
}
|
||||
void SetConstantReg(Reg r, u32 v);
|
||||
void ClearConstantReg(Reg r);
|
||||
void FlushConstantReg(Reg r);
|
||||
void FlushConstantRegs(bool invalidate);
|
||||
|
||||
Reg MipsD() const;
|
||||
u32 GetConditionalBranchTarget(CompileFlags cf) const;
|
||||
u32 GetBranchReturnAddress(CompileFlags cf) const;
|
||||
bool TrySwapDelaySlot(Reg rs = Reg::zero, Reg rt = Reg::zero, Reg rd = Reg::zero);
|
||||
void SetCompilerPC(u32 newpc);
|
||||
|
||||
virtual void DisassembleAndLog(const void* start, u32 size) = 0;
|
||||
virtual u32 GetHostInstructionCount(const void* start, u32 size) = 0;
|
||||
|
||||
virtual const void* GetCurrentCodePointer() = 0;
|
||||
|
||||
virtual void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space);
|
||||
virtual void BeginBlock();
|
||||
virtual void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) = 0;
|
||||
virtual void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) = 0;
|
||||
virtual void EndBlock(const std::optional<u32>& newpc) = 0;
|
||||
virtual void EndBlockWithException(Exception excode) = 0;
|
||||
virtual const void* EndCompile(u32* code_size, u32* far_code_size) = 0;
|
||||
|
||||
ALWAYS_INLINE bool IsHostRegAllocated(u32 r) const { return (m_host_regs[r].flags & HR_ALLOCATED) != 0; }
|
||||
static const char* GetReadWriteModeString(u32 flags);
|
||||
virtual const char* GetHostRegName(u32 reg) const = 0;
|
||||
u32 GetFreeHostReg(u32 flags);
|
||||
u32 AllocateHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
|
||||
std::optional<u32> CheckHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
|
||||
u32 AllocateTempHostReg(u32 flags = 0);
|
||||
void SwapHostRegAlloc(u32 lhs, u32 rhs);
|
||||
void FlushHostReg(u32 reg);
|
||||
void FreeHostReg(u32 reg);
|
||||
void ClearHostReg(u32 reg);
|
||||
void MarkRegsNeeded(HostRegAllocType type, Reg reg);
|
||||
void RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg);
|
||||
void ClearHostRegNeeded(u32 reg);
|
||||
void ClearHostRegsNeeded();
|
||||
void DeleteMIPSReg(Reg reg, bool flush);
|
||||
bool TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other);
|
||||
void UpdateHostRegCounters();
|
||||
|
||||
virtual void LoadHostRegWithConstant(u32 reg, u32 val) = 0;
|
||||
virtual void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) = 0;
|
||||
virtual void StoreConstantToCPUPointer(u32 val, const void* ptr) = 0;
|
||||
virtual void StoreHostRegToCPUPointer(u32 reg, const void* ptr) = 0;
|
||||
virtual void CopyHostReg(u32 dst, u32 src) = 0;
|
||||
virtual void Flush(u32 flags);
|
||||
|
||||
/// Returns true if there is a load delay which will be stored at the end of the instruction.
|
||||
bool HasLoadDelay() const { return m_load_delay_register != Reg::count; }
|
||||
|
||||
/// Cancels any pending load delay to the specified register.
|
||||
void CancelLoadDelaysToReg(Reg reg);
|
||||
|
||||
/// Moves load delay to the next load delay, and writes any previous load delay to the destination register.
|
||||
void UpdateLoadDelay();
|
||||
|
||||
/// Flushes the load delay, i.e. writes it to the destination register.
|
||||
void FinishLoadDelay();
|
||||
|
||||
/// Flushes the load delay, but only if it matches the specified register.
|
||||
void FinishLoadDelayToReg(Reg reg);
|
||||
|
||||
/// Uses a caller-saved register for load delays when PGXP is enabled.
|
||||
u32 GetFlagsForNewLoadDelayedReg() const;
|
||||
|
||||
void BackupHostState();
|
||||
void RestoreHostState();
|
||||
|
||||
/// Registers loadstore for possible backpatching.
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register,
|
||||
MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
|
||||
void CompileInstruction();
|
||||
void CompileBranchDelaySlot(bool dirty_pc = true);
|
||||
|
||||
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags),
|
||||
const void* pgxp_cpu_func, u32 tflags);
|
||||
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
|
||||
const std::optional<VirtualMemoryAddress>&),
|
||||
MemoryAccessSize size, bool store, bool sign, u32 tflags);
|
||||
void CompileMoveRegTemplate(Reg dst, Reg src);
|
||||
|
||||
virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
|
||||
Reg arg3reg = Reg::count) = 0;
|
||||
|
||||
virtual void Compile_Fallback() = 0;
|
||||
|
||||
void Compile_j();
|
||||
virtual void Compile_jr(CompileFlags cf) = 0;
|
||||
void Compile_jr_const(CompileFlags cf);
|
||||
void Compile_jal();
|
||||
virtual void Compile_jalr(CompileFlags cf) = 0;
|
||||
void Compile_jalr_const(CompileFlags cf);
|
||||
void Compile_syscall();
|
||||
void Compile_break();
|
||||
|
||||
void Compile_b_const(CompileFlags cf);
|
||||
void Compile_b(CompileFlags cf);
|
||||
void Compile_blez(CompileFlags cf);
|
||||
void Compile_blez_const(CompileFlags cf);
|
||||
void Compile_bgtz(CompileFlags cf);
|
||||
void Compile_bgtz_const(CompileFlags cf);
|
||||
void Compile_beq(CompileFlags cf);
|
||||
void Compile_beq_const(CompileFlags cf);
|
||||
void Compile_bne(CompileFlags cf);
|
||||
void Compile_bne_const(CompileFlags cf);
|
||||
virtual void Compile_bxx(CompileFlags cf, BranchCondition cond) = 0;
|
||||
void Compile_bxx_const(CompileFlags cf, BranchCondition cond);
|
||||
|
||||
void Compile_sll_const(CompileFlags cf);
|
||||
virtual void Compile_sll(CompileFlags cf) = 0;
|
||||
void Compile_srl_const(CompileFlags cf);
|
||||
virtual void Compile_srl(CompileFlags cf) = 0;
|
||||
void Compile_sra_const(CompileFlags cf);
|
||||
virtual void Compile_sra(CompileFlags cf) = 0;
|
||||
void Compile_sllv_const(CompileFlags cf);
|
||||
virtual void Compile_sllv(CompileFlags cf) = 0;
|
||||
void Compile_srlv_const(CompileFlags cf);
|
||||
virtual void Compile_srlv(CompileFlags cf) = 0;
|
||||
void Compile_srav_const(CompileFlags cf);
|
||||
virtual void Compile_srav(CompileFlags cf) = 0;
|
||||
void Compile_mult_const(CompileFlags cf);
|
||||
virtual void Compile_mult(CompileFlags cf) = 0;
|
||||
void Compile_multu_const(CompileFlags cf);
|
||||
virtual void Compile_multu(CompileFlags cf) = 0;
|
||||
void Compile_div_const(CompileFlags cf);
|
||||
virtual void Compile_div(CompileFlags cf) = 0;
|
||||
void Compile_divu_const(CompileFlags cf);
|
||||
virtual void Compile_divu(CompileFlags cf) = 0;
|
||||
void Compile_add_const(CompileFlags cf);
|
||||
virtual void Compile_add(CompileFlags cf) = 0;
|
||||
void Compile_addu_const(CompileFlags cf);
|
||||
virtual void Compile_addu(CompileFlags cf) = 0;
|
||||
void Compile_sub_const(CompileFlags cf);
|
||||
virtual void Compile_sub(CompileFlags cf) = 0;
|
||||
void Compile_subu_const(CompileFlags cf);
|
||||
virtual void Compile_subu(CompileFlags cf) = 0;
|
||||
void Compile_and_const(CompileFlags cf);
|
||||
virtual void Compile_and(CompileFlags cf) = 0;
|
||||
void Compile_or_const(CompileFlags cf);
|
||||
virtual void Compile_or(CompileFlags cf) = 0;
|
||||
void Compile_xor_const(CompileFlags cf);
|
||||
virtual void Compile_xor(CompileFlags cf) = 0;
|
||||
void Compile_nor_const(CompileFlags cf);
|
||||
virtual void Compile_nor(CompileFlags cf) = 0;
|
||||
void Compile_slt_const(CompileFlags cf);
|
||||
virtual void Compile_slt(CompileFlags cf) = 0;
|
||||
void Compile_sltu_const(CompileFlags cf);
|
||||
virtual void Compile_sltu(CompileFlags cf) = 0;
|
||||
|
||||
void Compile_addi_const(CompileFlags cf);
|
||||
virtual void Compile_addi(CompileFlags cf) = 0;
|
||||
void Compile_addiu_const(CompileFlags cf);
|
||||
virtual void Compile_addiu(CompileFlags cf) = 0;
|
||||
void Compile_slti_const(CompileFlags cf);
|
||||
virtual void Compile_slti(CompileFlags cf) = 0;
|
||||
void Compile_sltiu_const(CompileFlags cf);
|
||||
virtual void Compile_sltiu(CompileFlags cf) = 0;
|
||||
void Compile_andi_const(CompileFlags cf);
|
||||
virtual void Compile_andi(CompileFlags cf) = 0;
|
||||
void Compile_ori_const(CompileFlags cf);
|
||||
virtual void Compile_ori(CompileFlags cf) = 0;
|
||||
void Compile_xori_const(CompileFlags cf);
|
||||
virtual void Compile_xori(CompileFlags cf) = 0;
|
||||
void Compile_lui();
|
||||
|
||||
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0; // lwl/lwr
|
||||
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0; // swl/swr
|
||||
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) = 0;
|
||||
|
||||
static u32* GetCop0RegPtr(Cop0Reg reg);
|
||||
static u32 GetCop0RegWriteMask(Cop0Reg reg);
|
||||
|
||||
void Compile_mfc0(CompileFlags cf);
|
||||
virtual void Compile_mtc0(CompileFlags cf) = 0;
|
||||
virtual void Compile_rfe(CompileFlags cf) = 0;
|
||||
|
||||
void AddGTETicks(TickCount ticks);
|
||||
void StallUntilGTEComplete();
|
||||
virtual void Compile_mfc2(CompileFlags cf) = 0;
|
||||
virtual void Compile_mtc2(CompileFlags cf) = 0;
|
||||
virtual void Compile_cop2(CompileFlags cf) = 0;
|
||||
|
||||
enum GTERegisterAccessAction : u8
|
||||
{
|
||||
Ignore,
|
||||
Direct,
|
||||
ZeroExtend16,
|
||||
SignExtend16,
|
||||
CallHandler,
|
||||
PushFIFO,
|
||||
};
|
||||
|
||||
static std::pair<u32*, GTERegisterAccessAction> GetGTERegisterPointer(u32 index, bool writing);
|
||||
|
||||
Block* m_block = nullptr;
|
||||
u32 m_compiler_pc = 0;
|
||||
TickCount m_cycles = 0;
|
||||
TickCount m_gte_done_cycle = 0;
|
||||
|
||||
const Instruction* inst = nullptr;
|
||||
const InstructionInfo* iinfo = nullptr;
|
||||
u32 m_current_instruction_pc = 0;
|
||||
bool m_current_instruction_branch_delay_slot = false;
|
||||
bool m_branch_delay_slot_swapped = false;
|
||||
|
||||
bool m_dirty_pc = false;
|
||||
bool m_dirty_instruction_bits = false;
|
||||
bool m_dirty_gte_done_cycle = false;
|
||||
bool m_block_ended = false;
|
||||
|
||||
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_valid = {};
|
||||
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_dirty = {};
|
||||
std::array<u32, static_cast<size_t>(Reg::count)> m_constant_reg_values = {};
|
||||
|
||||
std::array<HostRegAlloc, NUM_HOST_REGS> m_host_regs = {};
|
||||
u16 m_register_alloc_counter = 0;
|
||||
|
||||
bool m_load_delay_dirty = true;
|
||||
Reg m_load_delay_register = Reg::count;
|
||||
u32 m_load_delay_value_register = 0;
|
||||
|
||||
Reg m_next_load_delay_register = Reg::count;
|
||||
u32 m_next_load_delay_value_register = 0;
|
||||
|
||||
struct HostStateBackup
|
||||
{
|
||||
TickCount cycles;
|
||||
TickCount gte_done_cycle;
|
||||
u32 compiler_pc;
|
||||
bool dirty_pc;
|
||||
bool dirty_instruction_bits;
|
||||
bool dirty_gte_done_cycle;
|
||||
bool block_ended;
|
||||
const Instruction* inst;
|
||||
const InstructionInfo* iinfo;
|
||||
u32 current_instruction_pc;
|
||||
bool current_instruction_delay_slot;
|
||||
std::bitset<static_cast<size_t>(Reg::count)> const_regs_valid;
|
||||
std::bitset<static_cast<size_t>(Reg::count)> const_regs_dirty;
|
||||
std::array<u32, static_cast<size_t>(Reg::count)> const_regs_values;
|
||||
std::array<HostRegAlloc, NUM_HOST_REGS> host_regs;
|
||||
u16 register_alloc_counter;
|
||||
bool load_delay_dirty;
|
||||
Reg load_delay_register;
|
||||
u32 load_delay_value_register;
|
||||
Reg next_load_delay_register;
|
||||
u32 next_load_delay_value_register;
|
||||
};
|
||||
|
||||
// we need two of these, one for branch delays, and another if we have an overflow in the delay slot
|
||||
std::array<HostStateBackup, 2> m_host_state_backup = {};
|
||||
u32 m_host_state_backup_count = 0;
|
||||
|
||||
// PGXP memory callbacks
|
||||
static const std::array<std::array<const void*, 2>, 3> s_pgxp_mem_load_functions;
|
||||
static const std::array<const void*, 3> s_pgxp_mem_store_functions;
|
||||
};
|
||||
|
||||
extern Compiler* g_compiler;
|
||||
} // namespace CPU::NewRec
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,166 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "cpu_newrec_compiler.h"
|
||||
#include <memory>
|
||||
|
||||
#include "vixl/aarch64/assembler-aarch64.h"
|
||||
|
||||
namespace CPU::NewRec {
|
||||
|
||||
class AArch64Compiler final : public Compiler
|
||||
{
|
||||
public:
|
||||
AArch64Compiler();
|
||||
~AArch64Compiler() override;
|
||||
|
||||
protected:
|
||||
void DisassembleAndLog(const void* start, u32 size) override;
|
||||
u32 GetHostInstructionCount(const void* start, u32 size) override;
|
||||
const char* GetHostRegName(u32 reg) const override;
|
||||
|
||||
const void* GetCurrentCodePointer() override;
|
||||
|
||||
void LoadHostRegWithConstant(u32 reg, u32 val) override;
|
||||
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
|
||||
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
|
||||
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
|
||||
void CopyHostReg(u32 dst, u32 src) override;
|
||||
|
||||
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
|
||||
void BeginBlock() override;
|
||||
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
|
||||
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
|
||||
void EndBlock(const std::optional<u32>& newpc) override;
|
||||
void EndBlockWithException(Exception excode) override;
|
||||
void EndAndLinkBlock(const std::optional<u32>& newpc);
|
||||
const void* EndCompile(u32* code_size, u32* far_code_size) override;
|
||||
|
||||
void Flush(u32 flags) override;
|
||||
|
||||
void Compile_Fallback() override;
|
||||
|
||||
void CheckBranchTarget(const vixl::aarch64::WRegister& pcreg);
|
||||
void Compile_jr(CompileFlags cf) override;
|
||||
void Compile_jalr(CompileFlags cf) override;
|
||||
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
|
||||
|
||||
void Compile_addi(CompileFlags cf, bool overflow);
|
||||
void Compile_addi(CompileFlags cf) override;
|
||||
void Compile_addiu(CompileFlags cf) override;
|
||||
void Compile_slti(CompileFlags cf, bool sign);
|
||||
void Compile_slti(CompileFlags cf) override;
|
||||
void Compile_sltiu(CompileFlags cf) override;
|
||||
void Compile_andi(CompileFlags cf) override;
|
||||
void Compile_ori(CompileFlags cf) override;
|
||||
void Compile_xori(CompileFlags cf) override;
|
||||
|
||||
void Compile_shift(CompileFlags cf, void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Register&, unsigned));
|
||||
void Compile_sll(CompileFlags cf) override;
|
||||
void Compile_srl(CompileFlags cf) override;
|
||||
void Compile_sra(CompileFlags cf) override;
|
||||
void Compile_variable_shift(CompileFlags cf,
|
||||
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Register&),
|
||||
void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Register&, unsigned));
|
||||
void Compile_sllv(CompileFlags cf) override;
|
||||
void Compile_srlv(CompileFlags cf) override;
|
||||
void Compile_srav(CompileFlags cf) override;
|
||||
void Compile_mult(CompileFlags cf, bool sign);
|
||||
void Compile_mult(CompileFlags cf) override;
|
||||
void Compile_multu(CompileFlags cf) override;
|
||||
void Compile_div(CompileFlags cf) override;
|
||||
void Compile_divu(CompileFlags cf) override;
|
||||
void TestOverflow(const vixl::aarch64::WRegister& result);
|
||||
void Compile_dst_op(CompileFlags cf,
|
||||
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Register&,
|
||||
const vixl::aarch64::Operand&),
|
||||
bool commutative, bool logical, bool overflow);
|
||||
void Compile_add(CompileFlags cf) override;
|
||||
void Compile_addu(CompileFlags cf) override;
|
||||
void Compile_sub(CompileFlags cf) override;
|
||||
void Compile_subu(CompileFlags cf) override;
|
||||
void Compile_and(CompileFlags cf) override;
|
||||
void Compile_or(CompileFlags cf) override;
|
||||
void Compile_xor(CompileFlags cf) override;
|
||||
void Compile_nor(CompileFlags cf) override;
|
||||
void Compile_slt(CompileFlags cf, bool sign);
|
||||
void Compile_slt(CompileFlags cf) override;
|
||||
void Compile_sltu(CompileFlags cf) override;
|
||||
|
||||
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
|
||||
vixl::aarch64::WRegister
|
||||
ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
|
||||
const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
|
||||
MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const vixl::aarch64::WRegister& sr);
|
||||
void Compile_mtc0(CompileFlags cf) override;
|
||||
void Compile_rfe(CompileFlags cf) override;
|
||||
|
||||
void Compile_mfc2(CompileFlags cf) override;
|
||||
void Compile_mtc2(CompileFlags cf) override;
|
||||
void Compile_cop2(CompileFlags cf) override;
|
||||
|
||||
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
|
||||
Reg arg3reg = Reg::count) override;
|
||||
|
||||
private:
|
||||
void EmitMov(const vixl::aarch64::WRegister& dst, u32 val);
|
||||
void EmitCall(const void* ptr, bool force_inline = false);
|
||||
|
||||
vixl::aarch64::Operand armCheckAddSubConstant(s32 val);
|
||||
vixl::aarch64::Operand armCheckAddSubConstant(u32 val);
|
||||
vixl::aarch64::Operand armCheckCompareConstant(s32 val);
|
||||
vixl::aarch64::Operand armCheckLogicalConstant(u32 val);
|
||||
|
||||
void SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
|
||||
void SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit);
|
||||
void SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero);
|
||||
void SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
|
||||
|
||||
void AssertRegOrConstS(CompileFlags cf) const;
|
||||
void AssertRegOrConstT(CompileFlags cf) const;
|
||||
vixl::aarch64::MemOperand MipsPtr(Reg r) const;
|
||||
vixl::aarch64::WRegister CFGetRegD(CompileFlags cf) const;
|
||||
vixl::aarch64::WRegister CFGetRegS(CompileFlags cf) const;
|
||||
vixl::aarch64::WRegister CFGetRegT(CompileFlags cf) const;
|
||||
vixl::aarch64::WRegister CFGetRegLO(CompileFlags cf) const;
|
||||
vixl::aarch64::WRegister CFGetRegHI(CompileFlags cf) const;
|
||||
|
||||
void MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
|
||||
void MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
|
||||
void MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg);
|
||||
|
||||
std::unique_ptr<vixl::aarch64::Assembler> m_emitter;
|
||||
std::unique_ptr<vixl::aarch64::Assembler> m_far_emitter;
|
||||
vixl::aarch64::Assembler* armAsm;
|
||||
|
||||
#ifdef VIXL_DEBUG
|
||||
std::unique_ptr<vixl::CodeBufferCheckScope> m_emitter_check;
|
||||
std::unique_ptr<vixl::CodeBufferCheckScope> m_far_emitter_check;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace CPU::NewRec
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,168 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "cpu_newrec_compiler.h"
|
||||
#include <memory>
|
||||
|
||||
#include "biscuit/assembler.hpp"
|
||||
|
||||
namespace CPU::NewRec {
|
||||
|
||||
class RISCV64Compiler final : public Compiler
|
||||
{
|
||||
public:
|
||||
RISCV64Compiler();
|
||||
~RISCV64Compiler() override;
|
||||
|
||||
protected:
|
||||
void DisassembleAndLog(const void* start, u32 size) override;
|
||||
u32 GetHostInstructionCount(const void* start, u32 size) override;
|
||||
const char* GetHostRegName(u32 reg) const override;
|
||||
|
||||
const void* GetCurrentCodePointer() override;
|
||||
|
||||
void LoadHostRegWithConstant(u32 reg, u32 val) override;
|
||||
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
|
||||
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
|
||||
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
|
||||
void CopyHostReg(u32 dst, u32 src) override;
|
||||
|
||||
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
|
||||
void BeginBlock() override;
|
||||
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
|
||||
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
|
||||
void EndBlock(const std::optional<u32>& newpc) override;
|
||||
void EndBlockWithException(Exception excode) override;
|
||||
void EndAndLinkBlock(const std::optional<u32>& newpc);
|
||||
const void* EndCompile(u32* code_size, u32* far_code_size) override;
|
||||
|
||||
void Flush(u32 flags) override;
|
||||
|
||||
void Compile_Fallback() override;
|
||||
|
||||
void CheckBranchTarget(const biscuit::GPR& pcreg);
|
||||
void Compile_jr(CompileFlags cf) override;
|
||||
void Compile_jalr(CompileFlags cf) override;
|
||||
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
|
||||
|
||||
void Compile_addi(CompileFlags cf, bool overflow);
|
||||
void Compile_addi(CompileFlags cf) override;
|
||||
void Compile_addiu(CompileFlags cf) override;
|
||||
void Compile_slti(CompileFlags cf, bool sign);
|
||||
void Compile_slti(CompileFlags cf) override;
|
||||
void Compile_sltiu(CompileFlags cf) override;
|
||||
void Compile_andi(CompileFlags cf) override;
|
||||
void Compile_ori(CompileFlags cf) override;
|
||||
void Compile_xori(CompileFlags cf) override;
|
||||
|
||||
void Compile_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
|
||||
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
|
||||
void Compile_sll(CompileFlags cf) override;
|
||||
void Compile_srl(CompileFlags cf) override;
|
||||
void Compile_sra(CompileFlags cf) override;
|
||||
void Compile_variable_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
|
||||
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
|
||||
void Compile_sllv(CompileFlags cf) override;
|
||||
void Compile_srlv(CompileFlags cf) override;
|
||||
void Compile_srav(CompileFlags cf) override;
|
||||
void Compile_mult(CompileFlags cf, bool sign);
|
||||
void Compile_mult(CompileFlags cf) override;
|
||||
void Compile_multu(CompileFlags cf) override;
|
||||
void Compile_div(CompileFlags cf) override;
|
||||
void Compile_divu(CompileFlags cf) override;
|
||||
void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard);
|
||||
void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
|
||||
void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
|
||||
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative,
|
||||
bool overflow);
|
||||
void Compile_add(CompileFlags cf) override;
|
||||
void Compile_addu(CompileFlags cf) override;
|
||||
void Compile_sub(CompileFlags cf) override;
|
||||
void Compile_subu(CompileFlags cf) override;
|
||||
void Compile_and(CompileFlags cf) override;
|
||||
void Compile_or(CompileFlags cf) override;
|
||||
void Compile_xor(CompileFlags cf) override;
|
||||
void Compile_nor(CompileFlags cf) override;
|
||||
void Compile_slt(CompileFlags cf, bool sign);
|
||||
void Compile_slt(CompileFlags cf) override;
|
||||
void Compile_sltu(CompileFlags cf) override;
|
||||
|
||||
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
|
||||
biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
|
||||
const std::optional<const biscuit::GPR>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const biscuit::GPR& sr);
|
||||
void Compile_mtc0(CompileFlags cf) override;
|
||||
void Compile_rfe(CompileFlags cf) override;
|
||||
|
||||
void Compile_mfc2(CompileFlags cf) override;
|
||||
void Compile_mtc2(CompileFlags cf) override;
|
||||
void Compile_cop2(CompileFlags cf) override;
|
||||
|
||||
private:
|
||||
void EmitMov(const biscuit::GPR& dst, u32 val);
|
||||
void EmitCall(const void* ptr);
|
||||
|
||||
void SwitchToFarCode(bool emit_jump,
|
||||
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) = nullptr,
|
||||
const biscuit::GPR& rs1 = biscuit::zero, const biscuit::GPR& rs2 = biscuit::zero);
|
||||
void SwitchToNearCode(bool emit_jump);
|
||||
|
||||
void AssertRegOrConstS(CompileFlags cf) const;
|
||||
void AssertRegOrConstT(CompileFlags cf) const;
|
||||
// vixl::aarch64::MemOperand MipsPtr(Reg r) const;
|
||||
|
||||
void SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
|
||||
void (biscuit::Assembler::*iop)(biscuit::GPR, biscuit::GPR, u32),
|
||||
void (biscuit::Assembler::*rop)(biscuit::GPR, biscuit::GPR, biscuit::GPR));
|
||||
|
||||
void SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
void SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
|
||||
|
||||
void EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
void EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
void EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
void EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
void EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
void EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
|
||||
|
||||
biscuit::GPR CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg);
|
||||
biscuit::GPR CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg);
|
||||
|
||||
biscuit::GPR CFGetRegD(CompileFlags cf) const;
|
||||
biscuit::GPR CFGetRegS(CompileFlags cf) const;
|
||||
biscuit::GPR CFGetRegT(CompileFlags cf) const;
|
||||
biscuit::GPR CFGetRegLO(CompileFlags cf) const;
|
||||
biscuit::GPR CFGetRegHI(CompileFlags cf) const;
|
||||
|
||||
void MoveSToReg(const biscuit::GPR& dst, CompileFlags cf);
|
||||
void MoveTToReg(const biscuit::GPR& dst, CompileFlags cf);
|
||||
|
||||
std::unique_ptr<biscuit::Assembler> m_emitter;
|
||||
std::unique_ptr<biscuit::Assembler> m_far_emitter;
|
||||
biscuit::Assembler* rvAsm;
|
||||
};
|
||||
|
||||
} // namespace CPU::NewRec
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,149 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "cpu_newrec_compiler.h"
|
||||
#include <initializer_list>
|
||||
#include <memory>
|
||||
|
||||
// We need to include windows.h before xbyak does..
|
||||
#ifdef _WIN32
|
||||
#include "common/windows_headers.h"
|
||||
#endif
|
||||
|
||||
#define XBYAK_NO_OP_NAMES 1
|
||||
#include "xbyak.h"
|
||||
|
||||
namespace CPU::NewRec {
|
||||
|
||||
class X64Compiler final : public Compiler
|
||||
{
|
||||
public:
|
||||
X64Compiler();
|
||||
~X64Compiler() override;
|
||||
|
||||
protected:
|
||||
void DisassembleAndLog(const void* start, u32 size) override;
|
||||
u32 GetHostInstructionCount(const void* start, u32 size) override;
|
||||
const char* GetHostRegName(u32 reg) const override;
|
||||
|
||||
const void* GetCurrentCodePointer() override;
|
||||
|
||||
void LoadHostRegWithConstant(u32 reg, u32 val) override;
|
||||
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
|
||||
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
|
||||
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
|
||||
void CopyHostReg(u32 dst, u32 src) override;
|
||||
|
||||
void Reset(Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
|
||||
void BeginBlock() override;
|
||||
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
|
||||
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
|
||||
void EndBlock(const std::optional<u32>& newpc) override;
|
||||
void EndBlockWithException(Exception excode) override;
|
||||
void EndAndLinkBlock(const std::optional<u32>& newpc);
|
||||
const void* EndCompile(u32* code_size, u32* far_code_size) override;
|
||||
|
||||
void Flush(u32 flags) override;
|
||||
|
||||
void Compile_Fallback() override;
|
||||
|
||||
void CheckBranchTarget(const Xbyak::Reg32& pcreg);
|
||||
void Compile_jr(CompileFlags cf) override;
|
||||
void Compile_jalr(CompileFlags cf) override;
|
||||
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
|
||||
|
||||
void Compile_addi(CompileFlags cf) override;
|
||||
void Compile_addiu(CompileFlags cf) override;
|
||||
void Compile_slti(CompileFlags cf, bool sign);
|
||||
void Compile_slti(CompileFlags cf) override;
|
||||
void Compile_sltiu(CompileFlags cf) override;
|
||||
void Compile_andi(CompileFlags cf) override;
|
||||
void Compile_ori(CompileFlags cf) override;
|
||||
void Compile_xori(CompileFlags cf) override;
|
||||
|
||||
void Compile_sll(CompileFlags cf) override;
|
||||
void Compile_srl(CompileFlags cf) override;
|
||||
void Compile_sra(CompileFlags cf) override;
|
||||
void Compile_variable_shift(CompileFlags cf,
|
||||
void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Reg8&),
|
||||
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int));
|
||||
void Compile_sllv(CompileFlags cf) override;
|
||||
void Compile_srlv(CompileFlags cf) override;
|
||||
void Compile_srav(CompileFlags cf) override;
|
||||
void Compile_mult(CompileFlags cf, bool sign);
|
||||
void Compile_mult(CompileFlags cf) override;
|
||||
void Compile_multu(CompileFlags cf) override;
|
||||
void Compile_div(CompileFlags cf) override;
|
||||
void Compile_divu(CompileFlags cf) override;
|
||||
void TestOverflow(const Xbyak::Reg32& result);
|
||||
void Compile_dst_op(CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&),
|
||||
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), bool commutative,
|
||||
bool overflow);
|
||||
void Compile_add(CompileFlags cf) override;
|
||||
void Compile_addu(CompileFlags cf) override;
|
||||
void Compile_sub(CompileFlags cf) override;
|
||||
void Compile_subu(CompileFlags cf) override;
|
||||
void Compile_and(CompileFlags cf) override;
|
||||
void Compile_or(CompileFlags cf) override;
|
||||
void Compile_xor(CompileFlags cf) override;
|
||||
void Compile_nor(CompileFlags cf) override;
|
||||
void Compile_slt(CompileFlags cf, bool sign);
|
||||
void Compile_slt(CompileFlags cf) override;
|
||||
void Compile_sltu(CompileFlags cf) override;
|
||||
|
||||
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
|
||||
Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
|
||||
const std::optional<const Xbyak::Reg32>& reg = std::nullopt);
|
||||
template<typename RegAllocFn>
|
||||
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
|
||||
const RegAllocFn& dst_reg_alloc);
|
||||
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size);
|
||||
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
|
||||
const std::optional<VirtualMemoryAddress>& address) override;
|
||||
|
||||
void TestInterrupts(const Xbyak::Reg32& sr);
|
||||
void Compile_mtc0(CompileFlags cf) override;
|
||||
void Compile_rfe(CompileFlags cf) override;
|
||||
|
||||
void Compile_mfc2(CompileFlags cf) override;
|
||||
void Compile_mtc2(CompileFlags cf) override;
|
||||
void Compile_cop2(CompileFlags cf) override;
|
||||
|
||||
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
|
||||
Reg arg3reg = Reg::count) override;
|
||||
|
||||
private:
|
||||
void SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
|
||||
void SwitchToNearCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
|
||||
|
||||
Xbyak::Address MipsPtr(Reg r) const;
|
||||
Xbyak::Reg32 CFGetRegD(CompileFlags cf) const;
|
||||
Xbyak::Reg32 CFGetRegS(CompileFlags cf) const;
|
||||
Xbyak::Reg32 CFGetRegT(CompileFlags cf) const;
|
||||
Xbyak::Reg32 CFGetRegLO(CompileFlags cf) const;
|
||||
Xbyak::Reg32 CFGetRegHI(CompileFlags cf) const;
|
||||
|
||||
Xbyak::Reg32 MoveSToD(CompileFlags cf);
|
||||
Xbyak::Reg32 MoveSToT(CompileFlags cf);
|
||||
Xbyak::Reg32 MoveTToD(CompileFlags cf);
|
||||
void MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf);
|
||||
void MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf);
|
||||
void MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg);
|
||||
|
||||
std::unique_ptr<Xbyak::CodeGenerator> m_emitter;
|
||||
std::unique_ptr<Xbyak::CodeGenerator> m_far_emitter;
|
||||
Xbyak::CodeGenerator* cg;
|
||||
};
|
||||
|
||||
} // namespace CPU::NewRec
|
|
@ -0,0 +1,164 @@
|
|||
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
#include "bus.h"
|
||||
#include "cpu_core_private.h"
|
||||
#include "cpu_newrec.h"
|
||||
#include "cpu_types.h"
|
||||
#include "types.h"
|
||||
#include "util/jit_code_buffer.h"
|
||||
#include <unordered_map>
|
||||
|
||||
namespace CPU::NewRec {
|
||||
enum : u32
|
||||
{
|
||||
LUT_TABLE_COUNT = 0x10000,
|
||||
LUT_TABLE_SIZE = 0x10000 / sizeof(u32), // 16384, one for each PC
|
||||
LUT_TABLE_SHIFT = 16,
|
||||
|
||||
MAX_BLOCK_EXIT_LINKS = 2,
|
||||
};
|
||||
|
||||
using CodeLUT = const void**;
|
||||
using CodeLUTArray = std::array<CodeLUT, LUT_TABLE_COUNT>;
|
||||
using BlockLinkMap = std::unordered_multimap<u32, void*>; // TODO: try ordered?
|
||||
|
||||
enum RegInfoFlags : u8
|
||||
{
|
||||
RI_LIVE = (1 << 0),
|
||||
RI_USED = (1 << 1),
|
||||
RI_LASTUSE = (1 << 2),
|
||||
};
|
||||
|
||||
struct InstructionInfo
|
||||
{
|
||||
u8 reg_flags[static_cast<u8>(Reg::count)];
|
||||
// Reg write_reg[3];
|
||||
Reg read_reg[3];
|
||||
|
||||
// If unset, values which are not live will not be written back to memory.
|
||||
// Tends to break stuff at the moment.
|
||||
static constexpr bool WRITE_DEAD_VALUES = true;
|
||||
|
||||
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
|
||||
/// In other words, the register is worth keeping in a host register/caching it.
|
||||
inline bool UsedTest(Reg reg) const { return (reg_flags[static_cast<u8>(reg)] & (RI_USED | RI_LASTUSE)) == RI_USED; }
|
||||
|
||||
/// Returns true if the value should be computed/written back.
|
||||
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
||||
inline bool LiveTest(Reg reg) const
|
||||
{
|
||||
return WRITE_DEAD_VALUES || ((reg_flags[static_cast<u8>(reg)] & RI_LIVE) != 0);
|
||||
}
|
||||
|
||||
/// Returns true if the register can be renamed into another.
|
||||
inline bool RenameTest(Reg reg) const { return (reg == Reg::zero || !UsedTest(reg) || !LiveTest(reg)); }
|
||||
|
||||
/// Returns true if this instruction reads this register.
|
||||
inline bool ReadsReg(Reg reg) const { return (read_reg[0] == reg || read_reg[1] == reg || read_reg[2] == reg); }
|
||||
};
|
||||
|
||||
enum class BlockState : u8
|
||||
{
|
||||
Valid,
|
||||
Invalidated,
|
||||
NeedsRecompile,
|
||||
};
|
||||
|
||||
struct Block
|
||||
{
|
||||
u32 pc;
|
||||
u32 size; // in guest instructions
|
||||
const void* host_code;
|
||||
|
||||
// links to previous/next block within page
|
||||
Block* next_block_in_page;
|
||||
|
||||
BlockLinkMap::iterator exit_links[MAX_BLOCK_EXIT_LINKS];
|
||||
u32 num_exit_links;
|
||||
|
||||
BlockState state;
|
||||
|
||||
// followed by Instruction * size, InstructionRegInfo * size
|
||||
const Instruction* Instructions() const { return reinterpret_cast<const Instruction*>(this + 1); }
|
||||
Instruction* Instructions() { return reinterpret_cast<Instruction*>(this + 1); }
|
||||
|
||||
const InstructionInfo* InstructionsInfo() const
|
||||
{
|
||||
return reinterpret_cast<const InstructionInfo*>(Instructions() + size);
|
||||
}
|
||||
InstructionInfo* InstructionsInfo() { return reinterpret_cast<InstructionInfo*>(Instructions() + size); }
|
||||
};
|
||||
|
||||
using BlockLUTArray = std::array<Block**, LUT_TABLE_COUNT>;
|
||||
|
||||
struct LoadstoreBackpatchInfo
|
||||
{
|
||||
u32 guest_pc;
|
||||
u32 gpr_bitmask;
|
||||
u16 cycles;
|
||||
u16 address_register : 5;
|
||||
u16 data_register : 5;
|
||||
u16 size : 2;
|
||||
u16 is_signed : 1;
|
||||
u16 is_load : 1;
|
||||
u8 code_size;
|
||||
u8 fault_count;
|
||||
|
||||
MemoryAccessSize AccessSize() const { return static_cast<MemoryAccessSize>(size); }
|
||||
u32 AccessSizeInBytes() const { return 1u << size; }
|
||||
};
|
||||
static_assert(sizeof(LoadstoreBackpatchInfo) == 16);
|
||||
|
||||
static inline bool BlockInRAM(VirtualMemoryAddress pc)
|
||||
{
|
||||
return VirtualAddressToPhysical(pc) < Bus::g_ram_size;
|
||||
}
|
||||
|
||||
enum class PageProtectionMode : u8
|
||||
{
|
||||
WriteProtected,
|
||||
ManualCheck,
|
||||
Unprotected,
|
||||
};
|
||||
struct PageProtectionInfo
|
||||
{
|
||||
Block* first_block_in_page;
|
||||
Block* last_block_in_page;
|
||||
|
||||
PageProtectionMode mode;
|
||||
u16 invalidate_count;
|
||||
u32 invalidate_frame;
|
||||
};
|
||||
static_assert(sizeof(PageProtectionInfo) == 24);
|
||||
|
||||
Block* LookupBlock(u32 pc);
|
||||
Block* CreateBlock(u32 pc);
|
||||
bool RevalidateBlock(Block* block);
|
||||
void CompileOrRevalidateBlock(u32 start_pc);
|
||||
void DiscardAndRecompileBlock(u32 start_pc);
|
||||
u32 CreateBlockLink(Block* from_block, void* code, u32 newpc);
|
||||
PageProtectionMode GetProtectionModeForBlock(Block* block);
|
||||
|
||||
u32 CompileASMFunctions(u8* code, u32 code_size);
|
||||
u32 EmitJump(void* code, const void* dst, bool flush_icache);
|
||||
|
||||
void SetFastMap(u32 pc, const void* function);
|
||||
|
||||
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, TickCount cycles, u32 gpr_bitmask,
|
||||
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
u32 BackpatchLoadStore(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, TickCount cycles_to_add,
|
||||
TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register,
|
||||
MemoryAccessSize size, bool is_signed, bool is_load);
|
||||
|
||||
extern CodeLUTArray g_fast_map;
|
||||
|
||||
extern NORETURN_FUNCTION_POINTER void(*g_enter_recompiler)();
|
||||
extern const void* g_compile_or_revalidate_block;
|
||||
extern const void* g_check_events_and_dispatch;
|
||||
extern const void* g_dispatcher;
|
||||
extern const void* g_interpret_block;
|
||||
extern const void* g_discard_and_recompile_block;
|
||||
|
||||
} // namespace CPU::NewRec
|
|
@ -979,8 +979,10 @@ void CodeGenerator::BlockPrologue()
|
|||
EmitFunctionCall(nullptr, &Thunks::LogPC, Value::FromConstantU32(m_pc));
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
if (m_block->uncached_fetch_ticks > 0 || m_block->icache_line_count > 0)
|
||||
EmitICacheCheckAndUpdate();
|
||||
#endif
|
||||
|
||||
// we don't know the state of the last block, so assume load delays might be in progress
|
||||
// TODO: Pull load delay into register cache
|
||||
|
@ -1115,7 +1117,9 @@ void CodeGenerator::AddPendingCycles(bool commit)
|
|||
|
||||
if (commit)
|
||||
{
|
||||
m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
|
||||
// m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
|
||||
m_gte_done_cycle = 0;
|
||||
m_gte_busy_cycles_dirty = true;
|
||||
m_delayed_cycles_add = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@ void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
|
|||
Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size)
|
||||
{
|
||||
#if 0
|
||||
// TODO: re-enable once implemented in new-rec
|
||||
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
|
||||
{
|
||||
TickCount read_ticks;
|
||||
|
@ -59,6 +61,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
return result;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Value result = m_register_cache.AllocateScratch(HostPointerSize);
|
||||
|
||||
|
@ -118,6 +121,8 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address,
|
||||
const SpeculativeValue& address_spec, RegSize size, const Value& value)
|
||||
{
|
||||
#if 0
|
||||
// TODO: re-enable once implemented in new-rec
|
||||
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
|
||||
{
|
||||
void* ptr = GetDirectWriteMemoryPointer(
|
||||
|
@ -134,6 +139,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const
|
|||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const bool use_fastmem =
|
||||
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && !SpeculativeIsCacheIsolated();
|
||||
|
|
|
@ -2709,6 +2709,8 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
|
|||
|
||||
void CodeGenerator::EmitICacheCheckAndUpdate()
|
||||
{
|
||||
// TODO: reenable me
|
||||
return;
|
||||
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
|
||||
{
|
||||
m_emit->add(m_emit->dword[GetCPUPtrReg() + offsetof(State, pending_ticks)],
|
||||
|
|
|
@ -369,6 +369,11 @@ void ImGuiManager::DrawPerformanceOverlay()
|
|||
text.append_fmt("{}{}", first ? "" : "/", "CI");
|
||||
first = false;
|
||||
}
|
||||
else if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
|
||||
{
|
||||
text.append_fmt("{}{}", first ? "" : "/", "NR");
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g_settings.cpu_recompiler_icache)
|
||||
|
|
|
@ -662,17 +662,6 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages)
|
|||
g_settings.rewind_enable = false;
|
||||
}
|
||||
|
||||
if (g_settings.IsRunaheadEnabled())
|
||||
{
|
||||
// Block linking is good for performance, but hurts when regularly loading (i.e. runahead), since everything has to
|
||||
// be unlinked. Which would be thousands of blocks.
|
||||
if (g_settings.cpu_recompiler_block_linking)
|
||||
{
|
||||
Log_WarningPrintf("Disabling block linking due to runahead.");
|
||||
g_settings.cpu_recompiler_block_linking = false;
|
||||
}
|
||||
}
|
||||
|
||||
// if challenge mode is enabled, disable things like rewind since they use save states
|
||||
if (Achievements::IsHardcoreModeActive())
|
||||
{
|
||||
|
@ -834,11 +823,13 @@ const char* Settings::GetDiscRegionDisplayName(DiscRegion region)
|
|||
return Host::TranslateToCString("DiscRegion", s_disc_region_display_names[static_cast<int>(region)]);
|
||||
}
|
||||
|
||||
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler"};
|
||||
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler",
|
||||
"NewRec"};
|
||||
static constexpr const std::array s_cpu_execution_mode_display_names = {
|
||||
TRANSLATE_NOOP("CPUExecutionMode", "Interpreter (Slowest)"),
|
||||
TRANSLATE_NOOP("CPUExecutionMode", "Cached Interpreter (Faster)"),
|
||||
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)")};
|
||||
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)"),
|
||||
TRANSLATE_NOOP("CPUExecutionMode", "New Recompiler (Experimental)")};
|
||||
|
||||
std::optional<CPUExecutionMode> Settings::ParseCPUExecutionMode(const char* str)
|
||||
{
|
||||
|
|
|
@ -255,7 +255,12 @@ struct Settings
|
|||
bool log_to_file = false;
|
||||
|
||||
ALWAYS_INLINE bool IsUsingCodeCache() const { return (cpu_execution_mode != CPUExecutionMode::Interpreter); }
|
||||
ALWAYS_INLINE bool IsUsingAnyRecompiler() const
|
||||
{
|
||||
return (cpu_execution_mode == CPUExecutionMode::Recompiler || cpu_execution_mode == CPUExecutionMode::NewRec);
|
||||
}
|
||||
ALWAYS_INLINE bool IsUsingRecompiler() const { return (cpu_execution_mode == CPUExecutionMode::Recompiler); }
|
||||
ALWAYS_INLINE bool IsUsingNewRec() const { return (cpu_execution_mode == CPUExecutionMode::NewRec); }
|
||||
ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); }
|
||||
ALWAYS_INLINE bool IsRunaheadEnabled() const { return (runahead_frames > 0); }
|
||||
|
||||
|
@ -277,8 +282,7 @@ struct Settings
|
|||
|
||||
ALWAYS_INLINE bool IsUsingFastmem() const
|
||||
{
|
||||
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler &&
|
||||
!cpu_recompiler_memory_exceptions);
|
||||
return (cpu_fastmem_mode != CPUFastmemMode::Disabled && IsUsingAnyRecompiler() && !cpu_recompiler_memory_exceptions);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE s32 GetAudioOutputVolume(bool fast_forwarding) const
|
||||
|
@ -422,7 +426,7 @@ struct Settings
|
|||
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
|
||||
static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f;
|
||||
|
||||
#ifdef ENABLE_RECOMPILER
|
||||
#if defined(ENABLE_RECOMPILER)
|
||||
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
|
||||
|
||||
// LUT still ends up faster on Apple Silicon for now, because of 16K pages.
|
||||
|
@ -431,6 +435,9 @@ struct Settings
|
|||
#else
|
||||
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT;
|
||||
#endif
|
||||
#elif defined(ENABLE_NEWREC)
|
||||
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
|
||||
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap;
|
||||
#else
|
||||
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
|
||||
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled;
|
||||
|
|
|
@ -3516,7 +3516,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
|
|||
CPU::ClearICache();
|
||||
}
|
||||
|
||||
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
|
||||
if (g_settings.IsUsingAnyRecompiler() &&
|
||||
(g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions ||
|
||||
g_settings.cpu_recompiler_block_linking != old_settings.cpu_recompiler_block_linking ||
|
||||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache ||
|
||||
|
|
|
@ -46,6 +46,7 @@ enum class CPUExecutionMode : u8
|
|||
Interpreter,
|
||||
CachedInterpreter,
|
||||
Recompiler,
|
||||
NewRec,
|
||||
Count
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue