CPU/Recompiler: Align dispatchers and JIT blocks
A couple of percent difference if we're lucky. Practically probably <1%.
This commit is contained in:
parent
82a843c121
commit
242561debf
|
@ -1492,18 +1492,15 @@ void CPU::CodeCache::CommitFarCode(u32 length)
|
|||
|
||||
void CPU::CodeCache::AlignCode(u32 alignment)
|
||||
{
|
||||
#if defined(CPU_ARCH_X64)
|
||||
constexpr u8 padding_value = 0xcc; // int3
|
||||
#else
|
||||
constexpr u8 padding_value = 0x00;
|
||||
#endif
|
||||
|
||||
DebugAssert(Common::IsPow2(alignment));
|
||||
const u32 num_padding_bytes =
|
||||
std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) -
|
||||
reinterpret_cast<uintptr_t>(s_free_code_ptr)),
|
||||
GetFreeCodeSpace());
|
||||
std::memset(s_free_code_ptr, padding_value, num_padding_bytes);
|
||||
|
||||
if (num_padding_bytes > 0)
|
||||
EmitAlignmentPadding(s_free_code_ptr, num_padding_bytes);
|
||||
|
||||
s_free_code_ptr += num_padding_bytes;
|
||||
s_code_used += num_padding_bytes;
|
||||
}
|
||||
|
|
|
@ -247,6 +247,7 @@ bool HasPreviouslyFaultedOnPC(u32 guest_pc);
|
|||
|
||||
u32 EmitASMFunctions(void* code, u32 code_size);
|
||||
u32 EmitJump(void* code, const void* dst, bool flush_icache);
|
||||
void EmitAlignmentPadding(void* dst, size_t size);
|
||||
|
||||
void DisassembleAndLogHostCode(const void* start, u32 size);
|
||||
u32 GetHostInstructionCount(const void* start, u32 size);
|
||||
|
|
|
@ -34,7 +34,7 @@ CPU::Recompiler::Recompiler::Recompiler() = default;
|
|||
CPU::Recompiler::Recompiler::~Recompiler() = default;
|
||||
|
||||
void CPU::Recompiler::Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space,
|
||||
u8* far_code_buffer, u32 far_code_space)
|
||||
u8* far_code_buffer, u32 far_code_space)
|
||||
{
|
||||
m_block = block;
|
||||
m_compiler_pc = block->pc;
|
||||
|
@ -101,10 +101,12 @@ void CPU::Recompiler::Recompiler::BeginBlock()
|
|||
}
|
||||
|
||||
const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u32* host_code_size,
|
||||
u32* host_far_code_size)
|
||||
u32* host_far_code_size)
|
||||
{
|
||||
Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(),
|
||||
CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace());
|
||||
CodeCache::AlignCode(FUNCTION_ALIGNMENT);
|
||||
|
||||
Reset(block, CodeCache::GetFreeCodePointer(), CodeCache::GetFreeCodeSpace(), CodeCache::GetFreeFarCodePointer(),
|
||||
CodeCache::GetFreeFarCodeSpace());
|
||||
|
||||
DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4);
|
||||
|
||||
|
@ -144,8 +146,8 @@ const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u
|
|||
const void* code = EndCompile(&code_size, &far_code_size);
|
||||
*host_code_size = code_size;
|
||||
*host_far_code_size = far_code_size;
|
||||
CPU::CodeCache::CommitCode(code_size);
|
||||
CPU::CodeCache::CommitFarCode(far_code_size);
|
||||
CodeCache::CommitCode(code_size);
|
||||
CodeCache::CommitFarCode(far_code_size);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
@ -651,7 +653,7 @@ const char* CPU::Recompiler::Recompiler::GetReadWriteModeString(u32 flags)
|
|||
}
|
||||
|
||||
u32 CPU::Recompiler::Recompiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
|
||||
Reg reg /* = Reg::count */)
|
||||
Reg reg /* = Reg::count */)
|
||||
{
|
||||
// Cancel any load delays before booting anything out
|
||||
if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE))
|
||||
|
@ -753,7 +755,7 @@ u32 CPU::Recompiler::Recompiler::AllocateHostReg(u32 flags, HostRegAllocType typ
|
|||
}
|
||||
|
||||
std::optional<u32> CPU::Recompiler::Recompiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
|
||||
Reg reg /* = Reg::count */)
|
||||
Reg reg /* = Reg::count */)
|
||||
{
|
||||
for (u32 i = 0; i < NUM_HOST_REGS; i++)
|
||||
{
|
||||
|
@ -1158,7 +1160,8 @@ void CPU::Recompiler::Recompiler::RestoreHostState()
|
|||
}
|
||||
|
||||
void CPU::Recompiler::Recompiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register,
|
||||
u32 data_register, MemoryAccessSize size, bool is_signed, bool is_load)
|
||||
u32 data_register, MemoryAccessSize size, bool is_signed,
|
||||
bool is_load)
|
||||
{
|
||||
DebugAssert(CodeCache::IsUsingFastmem());
|
||||
DebugAssert(address_register < NUM_HOST_REGS);
|
||||
|
@ -1367,8 +1370,8 @@ void CPU::Recompiler::Recompiler::CompileBranchDelaySlot(bool dirty_pc /* = true
|
|||
}
|
||||
|
||||
void CPU::Recompiler::Recompiler::CompileTemplate(void (Recompiler::*const_func)(CompileFlags),
|
||||
void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func,
|
||||
u32 tflags)
|
||||
void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func,
|
||||
u32 tflags)
|
||||
{
|
||||
// TODO: This is where we will do memory operand optimization. Remember to kill constants!
|
||||
// TODO: Swap S and T if commutative
|
||||
|
@ -1733,7 +1736,7 @@ const TickCount* CPU::Recompiler::Recompiler::GetFetchMemoryAccessTimePtr() cons
|
|||
}
|
||||
|
||||
void CPU::Recompiler::Recompiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
|
||||
bool use_fastmem)
|
||||
bool use_fastmem)
|
||||
{
|
||||
if (use_fastmem)
|
||||
return;
|
||||
|
|
|
@ -34,6 +34,9 @@ public:
|
|||
static constexpr u32 NUM_HOST_REGS = 16;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = true;
|
||||
|
||||
// Align functions to 16 bytes.
|
||||
static constexpr u32 FUNCTION_ALIGNMENT = 16;
|
||||
|
||||
#elif defined(CPU_ARCH_ARM32)
|
||||
|
||||
// A reasonable "maximum" number of bytes per instruction.
|
||||
|
@ -44,6 +47,9 @@ public:
|
|||
static constexpr u32 NUM_HOST_REGS = 16;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = false;
|
||||
|
||||
// Align functions to 4 bytes (word size).
|
||||
static constexpr u32 FUNCTION_ALIGNMENT = 16;
|
||||
|
||||
#elif defined(CPU_ARCH_ARM64)
|
||||
|
||||
// A reasonable "maximum" number of bytes per instruction.
|
||||
|
@ -56,6 +62,9 @@ public:
|
|||
static constexpr u32 NUM_HOST_REGS = 32;
|
||||
static constexpr bool HAS_MEMORY_OPERANDS = false;
|
||||
|
||||
// Align functions to 16 bytes.
|
||||
static constexpr u32 FUNCTION_ALIGNMENT = 16;
|
||||
|
||||
#elif defined(CPU_ARCH_RISCV64)
|
||||
|
||||
// Number of host registers.
|
||||
|
@ -68,6 +77,9 @@ public:
|
|||
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
|
||||
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;
|
||||
|
||||
// Align functions to 16 bytes.
|
||||
static constexpr u32 FUNCTION_ALIGNMENT = 16;
|
||||
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
|
|
@ -320,14 +320,17 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
|
||||
armAsm->FinalizeCode();
|
||||
|
||||
#if 0
|
||||
// TODO: align?
|
||||
s_trampoline_targets.clear();
|
||||
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
|
||||
s_trampoline_used = 0;
|
||||
#endif
|
||||
|
||||
return static_cast<u32>(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/;
|
||||
return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
|
||||
}
|
||||
|
||||
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
|
||||
{
|
||||
constexpr u8 padding_value = 0x00;
|
||||
std::memset(dst, padding_value, size);
|
||||
}
|
||||
|
||||
CPU::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32)
|
||||
|
@ -1025,7 +1028,8 @@ void CPU::ARM32Recompiler::Flush(u32 flags)
|
|||
|
||||
void CPU::ARM32Recompiler::Compile_Fallback()
|
||||
{
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
|
||||
inst->bits);
|
||||
|
||||
Flush(FLUSH_FOR_INTERPRETER);
|
||||
|
||||
|
|
|
@ -41,19 +41,20 @@ LOG_CHANNEL(Recompiler);
|
|||
#define RSTATE vixl::aarch64::x19
|
||||
#define RMEMBASE vixl::aarch64::x20
|
||||
|
||||
bool armIsCallerSavedRegister(u32 id);
|
||||
s64 armGetPCDisplacement(const void* current, const void* target);
|
||||
bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr);
|
||||
void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr);
|
||||
void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
|
||||
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
|
||||
void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
bool sign_extend_word = false);
|
||||
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
const vixl::aarch64::Register& tempreg = RXSCRATCH);
|
||||
u8* armGetJumpTrampoline(const void* target);
|
||||
static bool armIsCallerSavedRegister(u32 id);
|
||||
static s64 armGetPCDisplacement(const void* current, const void* target);
|
||||
static bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr);
|
||||
static void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr);
|
||||
static void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
|
||||
static void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
static void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
|
||||
static void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
|
||||
static void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
bool sign_extend_word = false);
|
||||
static void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
const vixl::aarch64::Register& tempreg = RXSCRATCH);
|
||||
static u8* armGetJumpTrampoline(const void* target);
|
||||
static void armAlignCode(vixl::aarch64::Assembler* armAsm, size_t alignment);
|
||||
|
||||
static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024;
|
||||
static std::unordered_map<const void*, u32> s_trampoline_targets;
|
||||
|
@ -327,8 +328,8 @@ void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Regis
|
|||
armAsm->ldr(reg, memop);
|
||||
}
|
||||
|
||||
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
|
||||
const vixl::aarch64::Register& tempreg)
|
||||
[[maybe_unused]] void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
|
||||
const void* addr, const vixl::aarch64::Register& tempreg)
|
||||
{
|
||||
DebugAssert(tempreg.IsX());
|
||||
|
||||
|
@ -359,7 +360,7 @@ u8* armGetJumpTrampoline(const void* target)
|
|||
return s_trampoline_start_ptr + it->second;
|
||||
|
||||
// align to 16 bytes?
|
||||
const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16);
|
||||
const u32 offset = Common::AlignUpPow2(s_trampoline_used, CPU::Recompiler::FUNCTION_ALIGNMENT);
|
||||
|
||||
// 4 movs plus a jump
|
||||
if (TRAMPOLINE_AREA_SIZE - offset < 20)
|
||||
|
@ -387,6 +388,17 @@ u8* armGetJumpTrampoline(const void* target)
|
|||
return start;
|
||||
}
|
||||
|
||||
void armAlignCode(vixl::aarch64::Assembler* armAsm, size_t alignment)
|
||||
{
|
||||
size_t addr = armAsm->GetCursorAddress<size_t>();
|
||||
const size_t end_addr = Common::AlignUpPow2(addr, alignment);
|
||||
while (addr != end_addr)
|
||||
{
|
||||
armAsm->nop();
|
||||
addr += vixl::aarch64::kInstructionSize;
|
||||
}
|
||||
}
|
||||
|
||||
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
|
||||
{
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
|
@ -434,7 +446,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
using namespace vixl::aarch64;
|
||||
|
||||
Assembler actual_asm(static_cast<u8*>(code), code_size);
|
||||
Assembler* armAsm = &actual_asm;
|
||||
Assembler* RESTRICT armAsm = &actual_asm;
|
||||
|
||||
#ifdef VIXL_DEBUG
|
||||
vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
|
||||
|
@ -455,21 +467,19 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
}
|
||||
|
||||
// check events then for frame done
|
||||
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
|
||||
g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
Label skip_event_check;
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
|
||||
armAsm->ldr(RWARG2, PTR(&g_state.downcount));
|
||||
armAsm->cmp(RWARG1, RWARG2);
|
||||
armAsm->b(&skip_event_check, lt);
|
||||
armAsm->b(&dispatch, lt);
|
||||
|
||||
g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true);
|
||||
|
||||
armAsm->bind(&skip_event_check);
|
||||
}
|
||||
|
||||
// TODO: align?
|
||||
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
|
||||
g_dispatcher = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->bind(&dispatch);
|
||||
|
@ -486,6 +496,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
armAsm->br(RXARG1);
|
||||
}
|
||||
|
||||
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
|
||||
g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
|
@ -493,6 +504,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
|
||||
g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
|
@ -500,6 +512,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
armAsm->b(&dispatch);
|
||||
}
|
||||
|
||||
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
|
||||
g_interpret_block = armAsm->GetCursorAddress<const void*>();
|
||||
{
|
||||
armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true);
|
||||
|
@ -508,7 +521,6 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
|
||||
armAsm->FinalizeCode();
|
||||
|
||||
// TODO: align?
|
||||
s_trampoline_targets.clear();
|
||||
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
|
||||
s_trampoline_used = 0;
|
||||
|
@ -516,6 +528,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
|
||||
}
|
||||
|
||||
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
|
||||
{
|
||||
constexpr u8 padding_value = 0x00;
|
||||
std::memset(dst, padding_value, size);
|
||||
}
|
||||
|
||||
CPU::ARM64Recompiler::ARM64Recompiler() : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode)
|
||||
{
|
||||
}
|
||||
|
@ -1174,7 +1192,8 @@ void CPU::ARM64Recompiler::Flush(u32 flags)
|
|||
|
||||
void CPU::ARM64Recompiler::Compile_Fallback()
|
||||
{
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
|
||||
inst->bits);
|
||||
|
||||
Flush(FLUSH_FOR_INTERPRETER);
|
||||
|
||||
|
|
|
@ -317,6 +317,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
|
||||
}
|
||||
|
||||
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
|
||||
{
|
||||
constexpr u8 padding_value = 0x00;
|
||||
std::memset(dst, padding_value, size);
|
||||
}
|
||||
|
||||
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
||||
{
|
||||
// TODO: get rid of assembler construction here
|
||||
|
@ -998,7 +1004,8 @@ void CPU::RISCV64Recompiler::Flush(u32 flags)
|
|||
|
||||
void CPU::RISCV64Recompiler::Compile_Fallback()
|
||||
{
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
|
||||
inst->bits);
|
||||
|
||||
Flush(FLUSH_FOR_INTERPRETER);
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ LOG_CHANNEL(Recompiler);
|
|||
// PGXP TODO: LWL etc, MFC0
|
||||
// PGXP TODO: Spyro 1 level gates have issues.
|
||||
|
||||
static constexpr u32 FUNCTION_ALIGNMENT = 16;
|
||||
static constexpr u32 BACKPATCH_JMP_SIZE = 5;
|
||||
|
||||
static bool IsCallerSavedRegister(u32 id);
|
||||
|
@ -134,20 +135,18 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
}
|
||||
|
||||
// check events then for frame done
|
||||
cg->align(FUNCTION_ALIGNMENT);
|
||||
g_check_events_and_dispatch = cg->getCurr();
|
||||
{
|
||||
Label skip_event_check;
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]);
|
||||
cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]);
|
||||
cg->jl(skip_event_check);
|
||||
cg->jl(dispatch);
|
||||
|
||||
g_run_events_and_dispatch = cg->getCurr();
|
||||
cg->call(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
|
||||
|
||||
cg->L(skip_event_check);
|
||||
}
|
||||
|
||||
// TODO: align?
|
||||
cg->align(FUNCTION_ALIGNMENT);
|
||||
g_dispatcher = cg->getCurr();
|
||||
{
|
||||
cg->L(dispatch);
|
||||
|
@ -164,6 +163,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
|
||||
}
|
||||
|
||||
cg->align(FUNCTION_ALIGNMENT);
|
||||
g_compile_or_revalidate_block = cg->getCurr();
|
||||
{
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
|
@ -171,6 +171,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
cg->jmp(dispatch);
|
||||
}
|
||||
|
||||
cg->align(FUNCTION_ALIGNMENT);
|
||||
g_discard_and_recompile_block = cg->getCurr();
|
||||
{
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
|
@ -178,6 +179,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
cg->jmp(dispatch);
|
||||
}
|
||||
|
||||
cg->align(FUNCTION_ALIGNMENT);
|
||||
g_interpret_block = cg->getCurr();
|
||||
{
|
||||
cg->call(CodeCache::GetInterpretUncachedBlockFunction());
|
||||
|
@ -201,6 +203,32 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
|
|||
return 5;
|
||||
}
|
||||
|
||||
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
|
||||
{
|
||||
// Copied from Xbyak nop(), to avoid constructing a CodeGenerator.
|
||||
static const uint8_t nopTbl[9][9] = {
|
||||
{0x90},
|
||||
{0x66, 0x90},
|
||||
{0x0F, 0x1F, 0x00},
|
||||
{0x0F, 0x1F, 0x40, 0x00},
|
||||
{0x0F, 0x1F, 0x44, 0x00, 0x00},
|
||||
{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
|
||||
{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
};
|
||||
const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
|
||||
u8* dst_ptr = static_cast<u8*>(dst);
|
||||
while (size > 0)
|
||||
{
|
||||
size_t len = (std::min)(n, size);
|
||||
const uint8_t* seq = nopTbl[len - 1];
|
||||
std::memcpy(dst_ptr, seq, len);
|
||||
dst_ptr += len;
|
||||
size -= len;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_HOST_DISASSEMBLY
|
||||
|
||||
static ZydisFormatterFunc s_old_print_address;
|
||||
|
@ -929,7 +957,8 @@ void CPU::X64Recompiler::Flush(u32 flags)
|
|||
|
||||
void CPU::X64Recompiler::Compile_Fallback()
|
||||
{
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);
|
||||
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
|
||||
inst->bits);
|
||||
|
||||
Flush(FLUSH_FOR_INTERPRETER);
|
||||
|
||||
|
|
Loading…
Reference in New Issue