CPU: Add new experimental recompiler

This commit is contained in:
Stenzek 2023-10-04 00:39:18 +10:00
parent c179473c2b
commit 9501439d6b
No known key found for this signature in database
23 changed files with 10228 additions and 9 deletions

View File

@ -23,6 +23,7 @@ endif()
# Renderer options.
option(ENABLE_OPENGL "Build with OpenGL renderer" ON)
option(ENABLE_VULKAN "Build with Vulkan renderer" ON)
option(ENABLE_NEWREC "Build with experimental new dynarec (needed for RISC-V)" ON)
# Global options.
if(NOT ANDROID)
@ -171,6 +172,9 @@ elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm" OR "${CMAKE_SYSTEM_PROCESSOR}"
endif()
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "riscv64")
set(CPU_ARCH "riscv64")
# Not done for us. Or we should inline atomics?
link_libraries("-latomic")
else()
message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

View File

@ -121,6 +121,11 @@ set(RECOMPILER_SRCS
cpu_recompiler_types.h
)
set(NEWREC_SOURCES
cpu_newrec_compiler.cpp
cpu_newrec_compiler.h
)
target_precompile_headers(core PRIVATE "pch.h")
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
@ -134,6 +139,15 @@ if(${CPU_ARCH} STREQUAL "x64")
cpu_recompiler_code_generator_x64.cpp
)
message("Building x64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_x64.cpp
cpu_newrec_compiler_x64.h
)
message("Building x64 newrec")
endif()
elseif(${CPU_ARCH} STREQUAL "aarch32")
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS}
@ -148,6 +162,25 @@ elseif(${CPU_ARCH} STREQUAL "aarch64")
)
target_link_libraries(core PUBLIC vixl)
message("Building AArch64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_aarch64.cpp
cpu_newrec_compiler_aarch64.h
)
message("Building AArch64 newrec")
endif()
elseif(${CPU_ARCH} STREQUAL "riscv64")
target_compile_definitions(core PUBLIC "ENABLE_MMAP_FASTMEM=1")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_riscv64.cpp
cpu_newrec_compiler_riscv64.h
)
target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas)
message("Building RISC-V 64-bit newrec")
endif()
else()
message("Not building recompiler")
endif()

View File

@ -8,6 +8,7 @@
<PreprocessorDefinitions Condition="('$(Platform)'!='ARM64')">ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_NEWREC=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\xxhash\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\rapidjson\include;$(SolutionDir)dep\discord-rpc\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Platform)'!='ARM64'">%(AdditionalIncludeDirectories);$(SolutionDir)dep\rainterface</AdditionalIncludeDirectories>

View File

@ -13,6 +13,13 @@
<ClCompile Include="cpu_core.cpp" />
<ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_newrec_compiler_x64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_recompiler_code_generator.cpp">
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
</ClCompile>
@ -90,6 +97,13 @@
<ClInclude Include="cpu_core_private.h" />
<ClInclude Include="cpu_disasm.h" />
<ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_newrec_compiler_x64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_recompiler_code_generator.h">
<ExcludedFromBuild Condition="'$(Platform)'=='Win32'">true</ExcludedFromBuild>
</ClInclude>

View File

@ -60,6 +60,9 @@
<ClCompile Include="hotkeys.cpp" />
<ClCompile Include="gpu_shadergen.cpp" />
<ClCompile Include="pch.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_x64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="types.h" />
@ -125,5 +128,8 @@
<ClInclude Include="gpu_shadergen.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="cpu_code_cache_private.h" />
<ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_compiler_x64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h" />
</ItemGroup>
</Project>

View File

@ -21,6 +21,10 @@ Log_SetChannel(CPU::CodeCache);
#include "cpu_recompiler_code_generator.h"
#endif
#ifdef ENABLE_NEWREC
#include "cpu_newrec_compiler.h"
#endif
#include <unordered_set>
#include <zlib.h>
@ -144,7 +148,8 @@ static u32 s_total_host_instructions_emitted = 0;
bool CPU::CodeCache::IsUsingAnyRecompiler()
{
#ifdef ENABLE_RECOMPILER_SUPPORT
return g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler;
return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler ||
g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
#else
return false;
#endif
@ -498,8 +503,8 @@ CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructio
return block;
}
// TODO: Only used by NewRec for now, don't waste time filling it.
if constexpr (false)
// Old rec doesn't use backprop info, don't waste time filling it.
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
FillBlockRegInfo(block);
// add it to the tracking list for its page
@ -1419,6 +1424,10 @@ bool CPU::CodeCache::CompileBlock(Block* block)
host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size);
}
#endif
#ifdef ENABLE_NEWREC
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
host_code = NewRec::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size);
#endif
s_code_buffer.WriteProtect(true);
@ -1570,6 +1579,10 @@ void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchI
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info);
#endif
#ifdef ENABLE_NEWREC
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
NewRec::BackpatchLoadStore(host_pc, info);
#endif
s_code_buffer.WriteProtect(true);
}

View File

@ -227,7 +227,7 @@ void InterpretUncachedBlock();
void LogCurrentState();
#if defined(ENABLE_RECOMPILER)
#if defined(ENABLE_RECOMPILER) || defined(ENABLE_NEWREC)
#define ENABLE_RECOMPILER_SUPPORT 1
#if defined(_DEBUG) || false

View File

@ -2231,6 +2231,7 @@ void CPU::Execute()
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
case CPUExecutionMode::NewRec:
CodeCache::Execute();
break;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,465 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_code_cache_private.h"
#include "cpu_recompiler_types.h"
#include "cpu_types.h"
#include <array>
#include <bitset>
#include <optional>
#include <utility>
#include <vector>
namespace CPU::NewRec {
// Global options
static constexpr bool EMULATE_LOAD_DELAYS = true;
static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true;
// Arch-specific options
#if defined(CPU_ARCH_X64)
static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = true;
#elif defined(CPU_ARCH_ARM64)
static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false;
#elif defined(CPU_ARCH_RISCV64)
static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false;
#endif
// TODO: Get rid of the virtuals... somehow.
class Compiler
{
public:
Compiler();
virtual ~Compiler();
const void* CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size);
protected:
enum FlushFlags : u32
{
FLUSH_FLUSH_MIPS_REGISTERS = (1 << 0),
FLUSH_INVALIDATE_MIPS_REGISTERS = (1 << 1),
FLUSH_FREE_CALLER_SAVED_REGISTERS = (1 << 2),
FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS = (1 << 3),
FLUSH_FREE_ALL_REGISTERS = (1 << 4),
FLUSH_PC = (1 << 5),
FLUSH_INSTRUCTION_BITS = (1 << 6),
FLUSH_CYCLES = (1 << 7),
FLUSH_LOAD_DELAY = (1 << 8),
FLUSH_LOAD_DELAY_FROM_STATE = (1 << 9),
FLUSH_GTE_DONE_CYCLE = (1 << 10),
FLUSH_GTE_STALL_FROM_STATE = (1 << 11),
FLUSH_FOR_C_CALL = (FLUSH_FREE_CALLER_SAVED_REGISTERS),
FLUSH_FOR_LOADSTORE = (FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_CYCLES),
FLUSH_FOR_BRANCH = (FLUSH_FLUSH_MIPS_REGISTERS),
FLUSH_FOR_EXCEPTION =
(FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE), // GTE cycles needed because it stalls when a GTE instruction is next.
FLUSH_FOR_INTERPRETER =
(FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_INVALIDATE_MIPS_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_PC |
FLUSH_CYCLES | FLUSH_INSTRUCTION_BITS | FLUSH_LOAD_DELAY | FLUSH_GTE_DONE_CYCLE),
FLUSH_END_BLOCK = 0xFFFFFFFFu & ~(FLUSH_PC | FLUSH_CYCLES | FLUSH_GTE_DONE_CYCLE | FLUSH_INSTRUCTION_BITS |
FLUSH_GTE_STALL_FROM_STATE),
};
union CompileFlags
{
struct
{
u32 const_s : 1; // S is constant
u32 const_t : 1; // T is constant
u32 const_lo : 1; // LO is constant
u32 const_hi : 1; // HI is constant
u32 valid_host_d : 1; // D is valid in host register
u32 valid_host_s : 1; // S is valid in host register
u32 valid_host_t : 1; // T is valid in host register
u32 valid_host_lo : 1; // LO is valid in host register
u32 valid_host_hi : 1; // HI is valid in host register
u32 host_d : 5; // D host register
u32 host_s : 5; // S host register
u32 host_t : 5; // T host register
u32 host_lo : 5; // LO host register
u32 delay_slot_swapped : 1;
u32 pad1 : 2; // 28..31
u32 host_hi : 5; // HI host register
u32 mips_s : 5; // S guest register
u32 mips_t : 5; // T guest register
u32 pad2 : 15; // 32 bits
};
u64 bits;
ALWAYS_INLINE Reg MipsS() const { return static_cast<Reg>(mips_s); }
ALWAYS_INLINE Reg MipsT() const { return static_cast<Reg>(mips_t); }
};
static_assert(sizeof(CompileFlags) == sizeof(u64));
enum TemplateFlag : u32
{
TF_READS_S = (1 << 0),
TF_READS_T = (1 << 1),
TF_READS_LO = (1 << 2),
TF_READS_HI = (1 << 3),
TF_WRITES_D = (1 << 4),
TF_WRITES_T = (1 << 5),
TF_WRITES_LO = (1 << 6),
TF_WRITES_HI = (1 << 7),
TF_COMMUTATIVE = (1 << 8), // S op T == T op S
TF_CAN_OVERFLOW = (1 << 9),
// TF_NORENAME = // TODO
TF_LOAD_DELAY = (1 << 10),
TF_GTE_STALL = (1 << 11),
TF_NO_NOP = (1 << 12),
TF_NEEDS_REG_S = (1 << 13),
TF_NEEDS_REG_T = (1 << 14),
TF_CAN_SWAP_DELAY_SLOT = (1 << 15),
TF_RENAME_WITH_ZERO_T = (1 << 16), // add commutative for S as well
TF_RENAME_WITH_ZERO_IMM = (1 << 17),
TF_PGXP_WITHOUT_CPU = (1 << 18),
};
enum HostRegFlags : u8
{
HR_ALLOCATED = (1 << 0),
HR_NEEDED = (1 << 1),
HR_MODE_READ = (1 << 2), // valid
HR_MODE_WRITE = (1 << 3), // dirty
HR_USABLE = (1 << 7),
HR_CALLEE_SAVED = (1 << 6),
ALLOWED_HR_FLAGS = HR_MODE_READ | HR_MODE_WRITE,
IMMUTABLE_HR_FLAGS = HR_USABLE | HR_CALLEE_SAVED,
};
enum HostRegAllocType : u8
{
HR_TYPE_TEMP,
HR_TYPE_CPU_REG,
HR_TYPE_PC_WRITEBACK,
HR_TYPE_LOAD_DELAY_VALUE,
HR_TYPE_NEXT_LOAD_DELAY_VALUE,
};
struct HostRegAlloc
{
u8 flags;
HostRegAllocType type;
Reg reg;
u16 counter;
};
enum class BranchCondition : u8
{
Equal,
NotEqual,
GreaterThanZero,
GreaterEqualZero,
LessThanZero,
LessEqualZero,
};
ALWAYS_INLINE bool HasConstantReg(Reg r) const { return m_constant_regs_valid.test(static_cast<u32>(r)); }
ALWAYS_INLINE bool HasDirtyConstantReg(Reg r) const { return m_constant_regs_dirty.test(static_cast<u32>(r)); }
ALWAYS_INLINE bool HasConstantRegValue(Reg r, u32 val) const
{
return m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u32>(r)] == val;
}
ALWAYS_INLINE u32 GetConstantRegU32(Reg r) const { return m_constant_reg_values[static_cast<u32>(r)]; }
ALWAYS_INLINE s32 GetConstantRegS32(Reg r) const
{
return static_cast<s32>(m_constant_reg_values[static_cast<u32>(r)]);
}
void SetConstantReg(Reg r, u32 v);
void ClearConstantReg(Reg r);
void FlushConstantReg(Reg r);
void FlushConstantRegs(bool invalidate);
Reg MipsD() const;
u32 GetConditionalBranchTarget(CompileFlags cf) const;
u32 GetBranchReturnAddress(CompileFlags cf) const;
bool TrySwapDelaySlot(Reg rs = Reg::zero, Reg rt = Reg::zero, Reg rd = Reg::zero);
void SetCompilerPC(u32 newpc);
virtual const void* GetCurrentCodePointer() = 0;
virtual void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
u32 far_code_space);
virtual void BeginBlock();
virtual void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) = 0;
virtual void GenerateICacheCheckAndUpdate() = 0;
virtual void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) = 0;
virtual void EndBlock(const std::optional<u32>& newpc, bool do_event_test) = 0;
virtual void EndBlockWithException(Exception excode) = 0;
virtual const void* EndCompile(u32* code_size, u32* far_code_size) = 0;
ALWAYS_INLINE bool IsHostRegAllocated(u32 r) const { return (m_host_regs[r].flags & HR_ALLOCATED) != 0; }
static const char* GetReadWriteModeString(u32 flags);
virtual const char* GetHostRegName(u32 reg) const = 0;
u32 GetFreeHostReg(u32 flags);
u32 AllocateHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
std::optional<u32> CheckHostReg(u32 flags, HostRegAllocType type = HR_TYPE_TEMP, Reg reg = Reg::count);
u32 AllocateTempHostReg(u32 flags = 0);
void SwapHostRegAlloc(u32 lhs, u32 rhs);
void FlushHostReg(u32 reg);
void FreeHostReg(u32 reg);
void ClearHostReg(u32 reg);
void MarkRegsNeeded(HostRegAllocType type, Reg reg);
void RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg);
void ClearHostRegNeeded(u32 reg);
void ClearHostRegsNeeded();
void DeleteMIPSReg(Reg reg, bool flush);
bool TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other);
void UpdateHostRegCounters();
virtual void LoadHostRegWithConstant(u32 reg, u32 val) = 0;
virtual void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) = 0;
virtual void StoreConstantToCPUPointer(u32 val, const void* ptr) = 0;
virtual void StoreHostRegToCPUPointer(u32 reg, const void* ptr) = 0;
virtual void CopyHostReg(u32 dst, u32 src) = 0;
virtual void Flush(u32 flags);
/// Returns true if there is a load delay which will be stored at the end of the instruction.
bool HasLoadDelay() const { return m_load_delay_register != Reg::count; }
/// Cancels any pending load delay to the specified register.
void CancelLoadDelaysToReg(Reg reg);
/// Moves load delay to the next load delay, and writes any previous load delay to the destination register.
void UpdateLoadDelay();
/// Flushes the load delay, i.e. writes it to the destination register.
void FinishLoadDelay();
/// Flushes the load delay, but only if it matches the specified register.
void FinishLoadDelayToReg(Reg reg);
/// Uses a caller-saved register for load delays when PGXP is enabled.
u32 GetFlagsForNewLoadDelayedReg() const;
void BackupHostState();
void RestoreHostState();
/// Registers loadstore for possible backpatching.
void AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register,
MemoryAccessSize size, bool is_signed, bool is_load);
void CompileInstruction();
void CompileBranchDelaySlot(bool dirty_pc = true);
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags),
const void* pgxp_cpu_func, u32 tflags);
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool,
const std::optional<VirtualMemoryAddress>&),
MemoryAccessSize size, bool store, bool sign, u32 tflags);
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store);
void CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move);
virtual void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) = 0;
virtual void Compile_Fallback() = 0;
void Compile_j();
virtual void Compile_jr(CompileFlags cf) = 0;
void Compile_jr_const(CompileFlags cf);
void Compile_jal();
virtual void Compile_jalr(CompileFlags cf) = 0;
void Compile_jalr_const(CompileFlags cf);
void Compile_syscall();
void Compile_break();
void Compile_b_const(CompileFlags cf);
void Compile_b(CompileFlags cf);
void Compile_blez(CompileFlags cf);
void Compile_blez_const(CompileFlags cf);
void Compile_bgtz(CompileFlags cf);
void Compile_bgtz_const(CompileFlags cf);
void Compile_beq(CompileFlags cf);
void Compile_beq_const(CompileFlags cf);
void Compile_bne(CompileFlags cf);
void Compile_bne_const(CompileFlags cf);
virtual void Compile_bxx(CompileFlags cf, BranchCondition cond) = 0;
void Compile_bxx_const(CompileFlags cf, BranchCondition cond);
void Compile_sll_const(CompileFlags cf);
virtual void Compile_sll(CompileFlags cf) = 0;
void Compile_srl_const(CompileFlags cf);
virtual void Compile_srl(CompileFlags cf) = 0;
void Compile_sra_const(CompileFlags cf);
virtual void Compile_sra(CompileFlags cf) = 0;
void Compile_sllv_const(CompileFlags cf);
virtual void Compile_sllv(CompileFlags cf) = 0;
void Compile_srlv_const(CompileFlags cf);
virtual void Compile_srlv(CompileFlags cf) = 0;
void Compile_srav_const(CompileFlags cf);
virtual void Compile_srav(CompileFlags cf) = 0;
void Compile_mult_const(CompileFlags cf);
virtual void Compile_mult(CompileFlags cf) = 0;
void Compile_multu_const(CompileFlags cf);
virtual void Compile_multu(CompileFlags cf) = 0;
void Compile_div_const(CompileFlags cf);
virtual void Compile_div(CompileFlags cf) = 0;
void Compile_divu_const(CompileFlags cf);
virtual void Compile_divu(CompileFlags cf) = 0;
void Compile_add_const(CompileFlags cf);
virtual void Compile_add(CompileFlags cf) = 0;
void Compile_addu_const(CompileFlags cf);
virtual void Compile_addu(CompileFlags cf) = 0;
void Compile_sub_const(CompileFlags cf);
virtual void Compile_sub(CompileFlags cf) = 0;
void Compile_subu_const(CompileFlags cf);
virtual void Compile_subu(CompileFlags cf) = 0;
void Compile_and_const(CompileFlags cf);
virtual void Compile_and(CompileFlags cf) = 0;
void Compile_or_const(CompileFlags cf);
virtual void Compile_or(CompileFlags cf) = 0;
void Compile_xor_const(CompileFlags cf);
virtual void Compile_xor(CompileFlags cf) = 0;
void Compile_nor_const(CompileFlags cf);
virtual void Compile_nor(CompileFlags cf) = 0;
void Compile_slt_const(CompileFlags cf);
virtual void Compile_slt(CompileFlags cf) = 0;
void Compile_sltu_const(CompileFlags cf);
virtual void Compile_sltu(CompileFlags cf) = 0;
void Compile_addi_const(CompileFlags cf);
virtual void Compile_addi(CompileFlags cf) = 0;
void Compile_addiu_const(CompileFlags cf);
virtual void Compile_addiu(CompileFlags cf) = 0;
void Compile_slti_const(CompileFlags cf);
virtual void Compile_slti(CompileFlags cf) = 0;
void Compile_sltiu_const(CompileFlags cf);
virtual void Compile_sltiu(CompileFlags cf) = 0;
void Compile_andi_const(CompileFlags cf);
virtual void Compile_andi(CompileFlags cf) = 0;
void Compile_ori_const(CompileFlags cf);
virtual void Compile_ori(CompileFlags cf) = 0;
void Compile_xori_const(CompileFlags cf);
virtual void Compile_xori(CompileFlags cf) = 0;
void Compile_lui();
virtual void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0; // lwl/lwr
virtual void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
virtual void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0; // swl/swr
virtual void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) = 0;
static u32* GetCop0RegPtr(Cop0Reg reg);
static u32 GetCop0RegWriteMask(Cop0Reg reg);
void Compile_mfc0(CompileFlags cf);
virtual void Compile_mtc0(CompileFlags cf) = 0;
virtual void Compile_rfe(CompileFlags cf) = 0;
void AddGTETicks(TickCount ticks);
void StallUntilGTEComplete();
virtual void Compile_mfc2(CompileFlags cf) = 0;
virtual void Compile_mtc2(CompileFlags cf) = 0;
virtual void Compile_cop2(CompileFlags cf) = 0;
enum GTERegisterAccessAction : u8
{
Ignore,
Direct,
ZeroExtend16,
SignExtend16,
CallHandler,
PushFIFO,
};
static std::pair<u32*, GTERegisterAccessAction> GetGTERegisterPointer(u32 index, bool writing);
CodeCache::Block* m_block = nullptr;
u32 m_compiler_pc = 0;
TickCount m_cycles = 0;
TickCount m_gte_done_cycle = 0;
const Instruction* inst = nullptr;
const CodeCache::InstructionInfo* iinfo = nullptr;
u32 m_current_instruction_pc = 0;
bool m_current_instruction_branch_delay_slot = false;
bool m_branch_delay_slot_swapped = false;
bool m_dirty_pc = false;
bool m_dirty_instruction_bits = false;
bool m_dirty_gte_done_cycle = false;
bool m_block_ended = false;
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_valid = {};
std::bitset<static_cast<size_t>(Reg::count)> m_constant_regs_dirty = {};
std::array<u32, static_cast<size_t>(Reg::count)> m_constant_reg_values = {};
std::array<HostRegAlloc, NUM_HOST_REGS> m_host_regs = {};
u16 m_register_alloc_counter = 0;
bool m_load_delay_dirty = true;
Reg m_load_delay_register = Reg::count;
u32 m_load_delay_value_register = 0;
Reg m_next_load_delay_register = Reg::count;
u32 m_next_load_delay_value_register = 0;
struct HostStateBackup
{
TickCount cycles;
TickCount gte_done_cycle;
u32 compiler_pc;
bool dirty_pc;
bool dirty_instruction_bits;
bool dirty_gte_done_cycle;
bool block_ended;
const Instruction* inst;
const CodeCache::InstructionInfo* iinfo;
u32 current_instruction_pc;
bool current_instruction_delay_slot;
std::bitset<static_cast<size_t>(Reg::count)> const_regs_valid;
std::bitset<static_cast<size_t>(Reg::count)> const_regs_dirty;
std::array<u32, static_cast<size_t>(Reg::count)> const_regs_values;
std::array<HostRegAlloc, NUM_HOST_REGS> host_regs;
u16 register_alloc_counter;
bool load_delay_dirty;
Reg load_delay_register;
u32 load_delay_value_register;
Reg next_load_delay_register;
u32 next_load_delay_value_register;
};
// we need two of these, one for branch delays, and another if we have an overflow in the delay slot
std::array<HostStateBackup, 2> m_host_state_backup = {};
u32 m_host_state_backup_count = 0;
// PGXP memory callbacks
static const std::array<std::array<const void*, 2>, 3> s_pgxp_mem_load_functions;
static const std::array<const void*, 3> s_pgxp_mem_store_functions;
};
void BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info);
u32 CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, TickCount cycles_to_add,
TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register,
MemoryAccessSize size, bool is_signed, bool is_load);
extern Compiler* g_compiler;
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,164 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <memory>
#include "vixl/aarch64/assembler-aarch64.h"
namespace CPU::NewRec {
class AArch64Compiler final : public Compiler
{
public:
AArch64Compiler();
~AArch64Compiler() override;
protected:
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateICacheCheckAndUpdate() override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc, bool do_event_test) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const vixl::aarch64::WRegister& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf, bool overflow);
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_shift(CompileFlags cf, void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&, unsigned));
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf,
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&,
const vixl::aarch64::Register&),
void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&, unsigned));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const vixl::aarch64::WRegister& result);
void Compile_dst_op(CompileFlags cf,
void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&,
const vixl::aarch64::Register&,
const vixl::aarch64::Operand&),
bool commutative, bool logical, bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
vixl::aarch64::WRegister
ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const vixl::aarch64::WRegister>& reg = std::nullopt);
template<typename RegAllocFn>
vixl::aarch64::WRegister GenerateLoad(const vixl::aarch64::WRegister& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const vixl::aarch64::WRegister& addr_reg, const vixl::aarch64::WRegister& value_reg,
MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const vixl::aarch64::WRegister& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void EmitMov(const vixl::aarch64::WRegister& dst, u32 val);
void EmitCall(const void* ptr, bool force_inline = false);
vixl::aarch64::Operand armCheckAddSubConstant(s32 val);
vixl::aarch64::Operand armCheckAddSubConstant(u32 val);
vixl::aarch64::Operand armCheckCompareConstant(s32 val);
vixl::aarch64::Operand armCheckLogicalConstant(u32 val);
void SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
void SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit);
void SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero);
void SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond = vixl::aarch64::Condition::al);
void AssertRegOrConstS(CompileFlags cf) const;
void AssertRegOrConstT(CompileFlags cf) const;
vixl::aarch64::MemOperand MipsPtr(Reg r) const;
vixl::aarch64::WRegister CFGetRegD(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegS(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegT(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegLO(CompileFlags cf) const;
vixl::aarch64::WRegister CFGetRegHI(CompileFlags cf) const;
void MoveSToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
void MoveTToReg(const vixl::aarch64::WRegister& dst, CompileFlags cf);
void MoveMIPSRegToReg(const vixl::aarch64::WRegister& dst, Reg reg);
std::unique_ptr<vixl::aarch64::Assembler> m_emitter;
std::unique_ptr<vixl::aarch64::Assembler> m_far_emitter;
vixl::aarch64::Assembler* armAsm;
#ifdef VIXL_DEBUG
std::unique_ptr<vixl::CodeBufferCheckScope> m_emitter_check;
std::unique_ptr<vixl::CodeBufferCheckScope> m_far_emitter_check;
#endif
};
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,168 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <memory>
namespace CPU::NewRec {
class RISCV64Compiler final : public Compiler
{
public:
RISCV64Compiler();
~RISCV64Compiler() override;
protected:
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
u32 far_code_space) override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateICacheCheckAndUpdate() override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc, bool do_event_test) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const biscuit::GPR& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf, bool overflow);
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard);
void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative,
bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
biscuit::GPR ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const biscuit::GPR>& reg = std::nullopt);
template<typename RegAllocFn>
void GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, const RegAllocFn& dst_reg_alloc);
void GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const biscuit::GPR& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void EmitMov(const biscuit::GPR& dst, u32 val);
void EmitCall(const void* ptr);
void SwitchToFarCode(bool emit_jump,
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) = nullptr,
const biscuit::GPR& rs1 = biscuit::zero, const biscuit::GPR& rs2 = biscuit::zero);
void SwitchToNearCode(bool emit_jump);
void AssertRegOrConstS(CompileFlags cf) const;
void AssertRegOrConstT(CompileFlags cf) const;
// vixl::aarch64::MemOperand MipsPtr(Reg r) const;
void SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
void (biscuit::Assembler::*iop)(biscuit::GPR, biscuit::GPR, u32),
void (biscuit::Assembler::*rop)(biscuit::GPR, biscuit::GPR, biscuit::GPR));
void SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm);
void EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
void EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs);
biscuit::GPR CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg);
biscuit::GPR CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg);
biscuit::GPR CFGetRegD(CompileFlags cf) const;
biscuit::GPR CFGetRegS(CompileFlags cf) const;
biscuit::GPR CFGetRegT(CompileFlags cf) const;
biscuit::GPR CFGetRegLO(CompileFlags cf) const;
biscuit::GPR CFGetRegHI(CompileFlags cf) const;
void MoveSToReg(const biscuit::GPR& dst, CompileFlags cf);
void MoveTToReg(const biscuit::GPR& dst, CompileFlags cf);
void MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg);
std::unique_ptr<biscuit::Assembler> m_emitter;
std::unique_ptr<biscuit::Assembler> m_far_emitter;
biscuit::Assembler* rvAsm;
};
} // namespace CPU::NewRec

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,140 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <initializer_list>
#include <memory>
namespace CPU::NewRec {
class X64Compiler final : public Compiler
{
public:
X64Compiler();
~X64Compiler() override;
protected:
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateICacheCheckAndUpdate() override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc, bool do_event_test) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const Xbyak::Reg32& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf,
void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Reg8&),
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const Xbyak::Reg32& result);
void Compile_dst_op(CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&),
void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), bool commutative,
bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
Xbyak::Reg32 ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const Xbyak::Reg32>& reg = std::nullopt);
template<typename RegAllocFn>
Xbyak::Reg32 GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const Xbyak::Reg32& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
void SwitchToNearCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*) = nullptr);
Xbyak::Address MipsPtr(Reg r) const;
Xbyak::Reg32 CFGetRegD(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegS(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegT(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegLO(CompileFlags cf) const;
Xbyak::Reg32 CFGetRegHI(CompileFlags cf) const;
Xbyak::Reg32 MoveSToD(CompileFlags cf);
Xbyak::Reg32 MoveSToT(CompileFlags cf);
Xbyak::Reg32 MoveTToD(CompileFlags cf);
void MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf);
void MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf);
void MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg);
std::unique_ptr<Xbyak::CodeGenerator> m_emitter;
std::unique_ptr<Xbyak::CodeGenerator> m_far_emitter;
Xbyak::CodeGenerator* cg;
};
} // namespace CPU::NewRec

View File

@ -46,7 +46,8 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
constexpr u32 stack_size = 8;
#endif
DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler ||
g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
CodeGenerator acg(code_size, static_cast<u8*>(code));
CodeGenerator* cg = &acg;

View File

@ -6,6 +6,8 @@
#pragma once
#include "cpu_types.h"
#include <utility>
#if defined(CPU_ARCH_X64)
// We need to include windows.h before xbyak does..
@ -130,4 +132,39 @@ u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler
#elif defined(CPU_ARCH_RISCV64)
#include "biscuit/assembler.hpp"
namespace CPU::Recompiler {
// A reasonable "maximum" number of bytes per instruction.
constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
#define RRET biscuit::a0
#define RARG1 biscuit::a0
#define RARG2 biscuit::a1
#define RARG3 biscuit::a2
#define RSCRATCH biscuit::t6
#define RSTATE biscuit::s10
#define RMEMBASE biscuit::s11
bool rvIsCallerSavedRegister(u32 id);
bool rvIsValidSExtITypeImm(u32 imm);
std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
u32 rvEmitJmp(biscuit::Assembler* armAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
u32 rvEmitCall(biscuit::Assembler* armAsm, const void* ptr);
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
} // namespace CPU::Recompiler
#endif

View File

@ -370,6 +370,11 @@ void ImGuiManager::DrawPerformanceOverlay()
text.append_fmt("{}{}", first ? "" : "/", "CI");
first = false;
}
else if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
{
text.append_fmt("{}{}", first ? "" : "/", "NR");
first = false;
}
else
{
if (g_settings.cpu_recompiler_icache)

View File

@ -834,11 +834,13 @@ const char* Settings::GetDiscRegionDisplayName(DiscRegion region)
return Host::TranslateToCString("DiscRegion", s_disc_region_display_names[static_cast<int>(region)]);
}
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler"};
static constexpr const std::array s_cpu_execution_mode_names = {"Interpreter", "CachedInterpreter", "Recompiler",
"NewRec"};
static constexpr const std::array s_cpu_execution_mode_display_names = {
TRANSLATE_NOOP("CPUExecutionMode", "Interpreter (Slowest)"),
TRANSLATE_NOOP("CPUExecutionMode", "Cached Interpreter (Faster)"),
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)")};
TRANSLATE_NOOP("CPUExecutionMode", "Recompiler (Fastest)"),
TRANSLATE_NOOP("CPUExecutionMode", "New Recompiler (Experimental)")};
std::optional<CPUExecutionMode> Settings::ParseCPUExecutionMode(const char* str)
{

View File

@ -414,7 +414,7 @@ struct Settings
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f;
#ifdef ENABLE_RECOMPILER
#if defined(ENABLE_RECOMPILER)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
// LUT still ends up faster on Apple Silicon for now, because of 16K pages.
@ -423,6 +423,9 @@ struct Settings
#else
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT;
#endif
#elif defined(ENABLE_NEWREC)
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled;

View File

@ -3532,7 +3532,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
CPU::ClearICache();
}
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler &&
if (CPU::CodeCache::IsUsingAnyRecompiler() &&
(g_settings.cpu_recompiler_memory_exceptions != old_settings.cpu_recompiler_memory_exceptions ||
g_settings.cpu_recompiler_block_linking != old_settings.cpu_recompiler_block_linking ||
g_settings.cpu_recompiler_icache != old_settings.cpu_recompiler_icache ||

View File

@ -46,6 +46,7 @@ enum class CPUExecutionMode : u8
Interpreter,
CachedInterpreter,
Recompiler,
NewRec,
Count
};