CPU/NewRec: Add AArch32 backend

This commit is contained in:
Stenzek 2023-10-21 16:23:01 +10:00
parent 8ddb0c4b23
commit b3cbe5a7ee
No known key found for this signature in database
13 changed files with 2526 additions and 47 deletions

View File

@ -23,7 +23,6 @@ endif()
# Renderer options. # Renderer options.
option(ENABLE_OPENGL "Build with OpenGL renderer" ON) option(ENABLE_OPENGL "Build with OpenGL renderer" ON)
option(ENABLE_VULKAN "Build with Vulkan renderer" ON) option(ENABLE_VULKAN "Build with Vulkan renderer" ON)
option(ENABLE_NEWREC "Build with experimental new dynarec (needed for RISC-V)" ON)
# Global options. # Global options.
if(NOT ANDROID) if(NOT ANDROID)

View File

@ -43,7 +43,7 @@ class CodeBuffer {
~CodeBuffer(); ~CodeBuffer();
void Reset(); void Reset();
void Reset(byte* buffer, size_t capacity); void Reset(byte* buffer, size_t capacity, bool managed = false);
#ifdef VIXL_CODE_BUFFER_MMAP #ifdef VIXL_CODE_BUFFER_MMAP
void SetExecutable(); void SetExecutable();

View File

@ -156,10 +156,11 @@ void CodeBuffer::Reset() {
SetClean(); SetClean();
} }
void CodeBuffer::Reset(byte* buffer, size_t capacity) { void CodeBuffer::Reset(byte* buffer, size_t capacity, bool managed) {
buffer_ = buffer; buffer_ = buffer;
cursor_ = buffer; cursor_ = buffer;
capacity_ = capacity; capacity_ = capacity;
managed_ = managed;
} }
void CodeBuffer::Grow(size_t new_capacity) { void CodeBuffer::Grow(size_t new_capacity) {

View File

@ -1,2 +1,3 @@
add_library(xbyak INTERFACE) add_library(xbyak INTERFACE)
target_include_directories(xbyak INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/xbyak") target_include_directories(xbyak INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/xbyak")
target_compile_definitions(xbyak INTERFACE "XBYAK_NO_EXCEPTION=1")

View File

@ -133,54 +133,40 @@ target_link_libraries(core PUBLIC Threads::Threads common util zlib)
target_link_libraries(core PRIVATE stb xxhash imgui rapidjson rcheevos) target_link_libraries(core PRIVATE stb xxhash imgui rapidjson rcheevos)
if(${CPU_ARCH} STREQUAL "x64") if(${CPU_ARCH} STREQUAL "x64")
target_compile_definitions(core PUBLIC "XBYAK_NO_EXCEPTION=1" "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES}
cpu_recompiler_code_generator_x64.cpp cpu_recompiler_code_generator_x64.cpp
)
target_link_libraries(core PRIVATE xbyak zydis)
message("Building x64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_x64.cpp cpu_newrec_compiler_x64.cpp
cpu_newrec_compiler_x64.h cpu_newrec_compiler_x64.h
) )
message("Building x64 newrec") target_link_libraries(core PRIVATE xbyak zydis)
endif() message("Building x64 recompiler")
elseif(${CPU_ARCH} STREQUAL "aarch32") elseif(${CPU_ARCH} STREQUAL "aarch32")
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES}
cpu_recompiler_code_generator_aarch32.cpp cpu_recompiler_code_generator_aarch32.cpp
cpu_newrec_compiler_aarch32.cpp
cpu_newrec_compiler_aarch32.h
) )
target_link_libraries(core PUBLIC vixl) target_link_libraries(core PUBLIC vixl)
message("Building AArch32 recompiler") message("Building AArch32 recompiler")
elseif(${CPU_ARCH} STREQUAL "aarch64") elseif(${CPU_ARCH} STREQUAL "aarch64")
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES}
cpu_recompiler_code_generator_aarch64.cpp cpu_recompiler_code_generator_aarch64.cpp
)
target_link_libraries(core PUBLIC vixl)
message("Building AArch64 recompiler")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_aarch64.cpp cpu_newrec_compiler_aarch64.cpp
cpu_newrec_compiler_aarch64.h cpu_newrec_compiler_aarch64.h
) )
message("Building AArch64 newrec") target_link_libraries(core PUBLIC vixl)
endif() message("Building AArch64 recompiler")
elseif(${CPU_ARCH} STREQUAL "riscv64") elseif(${CPU_ARCH} STREQUAL "riscv64")
target_compile_definitions(core PUBLIC "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1")
if(ENABLE_NEWREC)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1")
target_sources(core PRIVATE ${NEWREC_SOURCES} target_sources(core PRIVATE ${NEWREC_SOURCES}
cpu_newrec_compiler_riscv64.cpp cpu_newrec_compiler_riscv64.cpp
cpu_newrec_compiler_riscv64.h cpu_newrec_compiler_riscv64.h
) )
target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas) target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas)
message("Building RISC-V 64-bit newrec") message("Building RISC-V 64-bit recompiler")
endif()
else() else()
message("Not building recompiler") message("Not building recompiler")
endif() endif()

View File

@ -14,9 +14,15 @@
<ClCompile Include="cpu_disasm.cpp" /> <ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="cpu_code_cache.cpp" /> <ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" /> <ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch32.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp"> <ClCompile Include="cpu_newrec_compiler_aarch64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="cpu_newrec_compiler_riscv64.cpp">
<ExcludedFromBuild>true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_newrec_compiler_x64.cpp"> <ClCompile Include="cpu_newrec_compiler_x64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
@ -98,9 +104,15 @@
<ClInclude Include="cpu_disasm.h" /> <ClInclude Include="cpu_disasm.h" />
<ClInclude Include="cpu_code_cache.h" /> <ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_newrec_compiler.h" /> <ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_compiler_aarch32.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_newrec_compiler_aarch64.h"> <ClInclude Include="cpu_newrec_compiler_aarch64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClInclude> </ClInclude>
<ClInclude Include="cpu_newrec_compiler_riscv64.h">
<ExcludedFromBuild>true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="cpu_newrec_compiler_x64.h"> <ClInclude Include="cpu_newrec_compiler_x64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClInclude> </ClInclude>

View File

@ -63,6 +63,8 @@
<ClCompile Include="cpu_newrec_compiler.cpp" /> <ClCompile Include="cpu_newrec_compiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_x64.cpp" /> <ClCompile Include="cpu_newrec_compiler_x64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp" /> <ClCompile Include="cpu_newrec_compiler_aarch64.cpp" />
<ClCompile Include="cpu_newrec_compiler_riscv64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch32.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="types.h" /> <ClInclude Include="types.h" />
@ -131,5 +133,7 @@
<ClInclude Include="cpu_newrec_compiler.h" /> <ClInclude Include="cpu_newrec_compiler.h" />
<ClInclude Include="cpu_newrec_compiler_x64.h" /> <ClInclude Include="cpu_newrec_compiler_x64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h" /> <ClInclude Include="cpu_newrec_compiler_aarch64.h" />
<ClInclude Include="cpu_newrec_compiler_riscv64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch32.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -21,6 +21,9 @@ static constexpr bool SWAP_BRANCH_DELAY_SLOTS = true;
#if defined(CPU_ARCH_X64) #if defined(CPU_ARCH_X64)
static constexpr u32 NUM_HOST_REGS = 16; static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = true; static constexpr bool HAS_MEMORY_OPERANDS = true;
#elif defined(CPU_ARCH_ARM32)
static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = false;
#elif defined(CPU_ARCH_ARM64) #elif defined(CPU_ARCH_ARM64)
static constexpr u32 NUM_HOST_REGS = 32; static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false; static constexpr bool HAS_MEMORY_OPERANDS = false;
@ -153,6 +156,7 @@ protected:
HR_TYPE_PC_WRITEBACK, HR_TYPE_PC_WRITEBACK,
HR_TYPE_LOAD_DELAY_VALUE, HR_TYPE_LOAD_DELAY_VALUE,
HR_TYPE_NEXT_LOAD_DELAY_VALUE, HR_TYPE_NEXT_LOAD_DELAY_VALUE,
HR_TYPE_MEMBASE,
}; };
struct HostRegAlloc struct HostRegAlloc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,164 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "cpu_newrec_compiler.h"
#include <memory>
#include "vixl/aarch32/assembler-aarch32.h"
#include "vixl/aarch32/operands-aarch32.h"
namespace CPU::NewRec {
class AArch32Compiler final : public Compiler
{
public:
AArch32Compiler();
~AArch32Compiler() override;
protected:
const char* GetHostRegName(u32 reg) const override;
const void* GetCurrentCodePointer() override;
void LoadHostRegWithConstant(u32 reg, u32 val) override;
void LoadHostRegFromCPUPointer(u32 reg, const void* ptr) override;
void StoreConstantToCPUPointer(u32 val, const void* ptr) override;
void StoreHostRegToCPUPointer(u32 reg, const void* ptr) override;
void CopyHostReg(u32 dst, u32 src) override;
void Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
u32 far_code_space) override;
void BeginBlock() override;
void GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) override;
void GenerateICacheCheckAndUpdate() override;
void GenerateCall(const void* func, s32 arg1reg = -1, s32 arg2reg = -1, s32 arg3reg = -1) override;
void EndBlock(const std::optional<u32>& newpc, bool do_event_test) override;
void EndBlockWithException(Exception excode) override;
void EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test);
const void* EndCompile(u32* code_size, u32* far_code_size) override;
void Flush(u32 flags) override;
void Compile_Fallback() override;
void CheckBranchTarget(const vixl::aarch32::Register& pcreg);
void Compile_jr(CompileFlags cf) override;
void Compile_jalr(CompileFlags cf) override;
void Compile_bxx(CompileFlags cf, BranchCondition cond) override;
void Compile_addi(CompileFlags cf, bool overflow);
void Compile_addi(CompileFlags cf) override;
void Compile_addiu(CompileFlags cf) override;
void Compile_slti(CompileFlags cf, bool sign);
void Compile_slti(CompileFlags cf) override;
void Compile_sltiu(CompileFlags cf) override;
void Compile_andi(CompileFlags cf) override;
void Compile_ori(CompileFlags cf) override;
void Compile_xori(CompileFlags cf) override;
void Compile_shift(CompileFlags cf,
void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, vixl::aarch32::Register,
const vixl::aarch32::Operand&));
void Compile_sll(CompileFlags cf) override;
void Compile_srl(CompileFlags cf) override;
void Compile_sra(CompileFlags cf) override;
void Compile_variable_shift(CompileFlags cf,
void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, vixl::aarch32::Register,
const vixl::aarch32::Operand&));
void Compile_sllv(CompileFlags cf) override;
void Compile_srlv(CompileFlags cf) override;
void Compile_srav(CompileFlags cf) override;
void Compile_mult(CompileFlags cf, bool sign);
void Compile_mult(CompileFlags cf) override;
void Compile_multu(CompileFlags cf) override;
void Compile_div(CompileFlags cf) override;
void Compile_divu(CompileFlags cf) override;
void TestOverflow(const vixl::aarch32::Register& result);
void Compile_dst_op(CompileFlags cf,
void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, vixl::aarch32::Register,
const vixl::aarch32::Operand&),
bool commutative, bool logical, bool overflow);
void Compile_add(CompileFlags cf) override;
void Compile_addu(CompileFlags cf) override;
void Compile_sub(CompileFlags cf) override;
void Compile_subu(CompileFlags cf) override;
void Compile_and(CompileFlags cf) override;
void Compile_or(CompileFlags cf) override;
void Compile_xor(CompileFlags cf) override;
void Compile_nor(CompileFlags cf) override;
void Compile_slt(CompileFlags cf, bool sign);
void Compile_slt(CompileFlags cf) override;
void Compile_sltu(CompileFlags cf) override;
vixl::aarch32::Register
ComputeLoadStoreAddressArg(CompileFlags cf, const std::optional<VirtualMemoryAddress>& address,
const std::optional<const vixl::aarch32::Register>& reg = std::nullopt);
template<typename RegAllocFn>
vixl::aarch32::Register GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, bool sign,
const RegAllocFn& dst_reg_alloc);
void GenerateStore(const vixl::aarch32::Register& addr_reg, const vixl::aarch32::Register& value_reg,
MemoryAccessSize size);
void Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) override;
void TestInterrupts(const vixl::aarch32::Register& sr);
void Compile_mtc0(CompileFlags cf) override;
void Compile_rfe(CompileFlags cf) override;
void Compile_mfc2(CompileFlags cf) override;
void Compile_mtc2(CompileFlags cf) override;
void Compile_cop2(CompileFlags cf) override;
void GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg = Reg::count,
Reg arg3reg = Reg::count) override;
private:
void EmitMov(const vixl::aarch32::Register& dst, u32 val);
void EmitCall(const void* ptr, bool force_inline = false);
vixl::aarch32::Operand armCheckAddSubConstant(s32 val);
vixl::aarch32::Operand armCheckAddSubConstant(u32 val);
vixl::aarch32::Operand armCheckCompareConstant(s32 val);
vixl::aarch32::Operand armCheckLogicalConstant(u32 val);
void SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond = vixl::aarch32::ConditionType::al);
void SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit);
void SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, bool nonzero);
void SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond = vixl::aarch32::ConditionType::al);
void AssertRegOrConstS(CompileFlags cf) const;
void AssertRegOrConstT(CompileFlags cf) const;
vixl::aarch32::MemOperand MipsPtr(Reg r) const;
vixl::aarch32::Register CFGetRegD(CompileFlags cf) const;
vixl::aarch32::Register CFGetRegS(CompileFlags cf) const;
vixl::aarch32::Register CFGetRegT(CompileFlags cf) const;
vixl::aarch32::Register CFGetRegLO(CompileFlags cf) const;
vixl::aarch32::Register CFGetRegHI(CompileFlags cf) const;
vixl::aarch32::Register GetMembaseReg();
void MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf);
void MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf);
void MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg);
vixl::aarch32::Assembler m_emitter;
vixl::aarch32::Assembler m_far_emitter;
vixl::aarch32::Assembler* armAsm;
#ifdef VIXL_DEBUG
std::unique_ptr<vixl::CodeBufferCheckScope> m_emitter_check;
std::unique_ptr<vixl::CodeBufferCheckScope> m_far_emitter_check;
#endif
};
} // namespace CPU::NewRec

View File

@ -16,7 +16,7 @@
#include <limits> #include <limits>
Log_SetChannel(CPU::NewRec); Log_SetChannel(CPU::NewRec);
#define PTR(x) vixl::aarch64::MemOperand(RSTATE, (u32)(((u8*)(x)) - ((u8*)&g_state))) #define PTR(x) vixl::aarch64::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state)))
namespace CPU::NewRec { namespace CPU::NewRec {
@ -1019,10 +1019,10 @@ void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf)
Label done; Label done;
Label not_divide_by_zero; Label not_divide_by_zero;
armAsm->cbnz(rt, &not_divide_by_zero); armAsm->cbnz(rt, &not_divide_by_zero);
armAsm->cmp(rs, 0);
armAsm->mov(rhi, rs); // hi = num armAsm->mov(rhi, rs); // hi = num
EmitMov(rlo, 1); EmitMov(rlo, 1);
EmitMov(RWSCRATCH, static_cast<u32>(-1)); EmitMov(RWSCRATCH, static_cast<u32>(-1));
armAsm->cmp(rs, 0);
armAsm->csel(rlo, RWSCRATCH, rlo, ge); // lo = s >= 0 ? -1 : 1 armAsm->csel(rlo, RWSCRATCH, rlo, ge); // lo = s >= 0 ? -1 : 1
armAsm->b(&done); armAsm->b(&done);
@ -1328,7 +1328,7 @@ vixl::aarch64::WRegister CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::
{ {
DebugAssert(addr_reg.GetCode() != RWARG3.GetCode()); DebugAssert(addr_reg.GetCode() != RWARG3.GetCode());
armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT);
armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 8)); armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 3));
} }
const MemOperand mem = const MemOperand mem =
@ -1438,7 +1438,7 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::WRegister&
{ {
DebugAssert(addr_reg.GetCode() != RWARG3.GetCode()); DebugAssert(addr_reg.GetCode() != RWARG3.GetCode());
armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT);
armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 8)); armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 3));
} }
const MemOperand mem = const MemOperand mem =

View File

@ -25,8 +25,19 @@ namespace CPU::Recompiler {
constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers
constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes
constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE;
static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024;
static std::unordered_map<const void*, u32> s_trampoline_targets;
static u8* s_trampoline_start_ptr = nullptr;
static u32 s_trampoline_used = 0;
} // namespace CPU::Recompiler } // namespace CPU::Recompiler
bool CPU::Recompiler::armIsCallerSavedRegister(u32 id)
{
return ((id >= 0 && id <= 3) || // r0-r3
(id == 12 || id == 14)); // sp, pc
}
s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target)
{ {
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4)); Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -59,10 +70,19 @@ void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, cons
void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline)
{ {
// TODO: pooling const void* cur = armAsm->GetCursorAddress<const void*>();
s32 displacement = armGetPCDisplacement(cur, ptr);
bool use_bx = !armIsPCDisplacementInImmediateRange(displacement);
if (use_bx && !force_inline)
{
if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
{
displacement = armGetPCDisplacement(cur, trampoline);
use_bx = !armIsPCDisplacementInImmediateRange(displacement);
}
}
const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr); if (use_bx)
if (!armIsPCDisplacementInImmediateRange(displacement))
{ {
armMoveAddressToReg(armAsm, RSCRATCH, ptr); armMoveAddressToReg(armAsm, RSCRATCH, ptr);
armAsm->bx(RSCRATCH); armAsm->bx(RSCRATCH);
@ -76,10 +96,19 @@ void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* p
void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline)
{ {
// TODO: pooling const void* cur = armAsm->GetCursorAddress<const void*>();
s32 displacement = armGetPCDisplacement(cur, ptr);
bool use_blx = !armIsPCDisplacementInImmediateRange(displacement);
if (use_blx && !force_inline)
{
if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
{
displacement = armGetPCDisplacement(cur, trampoline);
use_blx = !armIsPCDisplacementInImmediateRange(displacement);
}
}
const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr); if (use_blx)
if (!armIsPCDisplacementInImmediateRange(displacement))
{ {
armMoveAddressToReg(armAsm, RSCRATCH, ptr); armMoveAddressToReg(armAsm, RSCRATCH, ptr);
armAsm->blx(RSCRATCH); armAsm->blx(RSCRATCH);
@ -91,6 +120,21 @@ void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void*
} }
} }
void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr)
{
const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr);
if (!armIsPCDisplacementInImmediateRange(displacement))
{
armMoveAddressToReg(armAsm, RSCRATCH, ptr);
armAsm->blx(cond, RSCRATCH);
}
else
{
a32::Label label(displacement + armAsm->GetCursorOffset());
armAsm->b(cond, &label);
}
}
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
{ {
#ifdef ENABLE_HOST_DISASSEMBLY #ifdef ENABLE_HOST_DISASSEMBLY
@ -128,6 +172,36 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
return kA32InstructionSizeInBytes; return kA32InstructionSizeInBytes;
} }
u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
{
auto it = s_trampoline_targets.find(target);
if (it != s_trampoline_targets.end())
return s_trampoline_start_ptr + it->second;
// align to 16 bytes?
const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16);
// 4 movs plus a jump
if (TRAMPOLINE_AREA_SIZE - offset < 20)
{
Panic("Ran out of space in constant pool");
return nullptr;
}
u8* start = s_trampoline_start_ptr + offset;
a32::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset);
armMoveAddressToReg(&armAsm, RSCRATCH, target);
armAsm.bx(RSCRATCH);
const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated());
DebugAssert(size < 20);
s_trampoline_targets.emplace(target, offset);
s_trampoline_used = offset + static_cast<u32>(size);
JitCodeBuffer::FlushInstructionCache(start, size);
return start;
}
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
{ {
using namespace vixl::aarch32; using namespace vixl::aarch32;

View File

@ -78,6 +78,7 @@ constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
#define RRET vixl::aarch32::r0 #define RRET vixl::aarch32::r0
#define RRETHI vixl::aarch32::r1
#define RARG1 vixl::aarch32::r0 #define RARG1 vixl::aarch32::r0
#define RARG2 vixl::aarch32::r1 #define RARG2 vixl::aarch32::r1
#define RARG3 vixl::aarch32::r2 #define RARG3 vixl::aarch32::r2
@ -85,12 +86,15 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
#define RSCRATCH vixl::aarch32::r12 #define RSCRATCH vixl::aarch32::r12
#define RSTATE vixl::aarch32::r4 #define RSTATE vixl::aarch32::r4
bool armIsCallerSavedRegister(u32 id);
s32 armGetPCDisplacement(const void* current, const void* target); s32 armGetPCDisplacement(const void* current, const void* target);
bool armIsPCDisplacementInImmediateRange(s32 displacement); bool armIsPCDisplacementInImmediateRange(s32 displacement);
void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr); void armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, const void* addr);
void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm); void armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm);
void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline); void armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, const void* ptr);
u8* armGetJumpTrampoline(const void* target);
} // namespace CPU::Recompiler } // namespace CPU::Recompiler