CPU/Recompiler: Align dispatchers and JIT blocks

A couple of percent difference if we're lucky. Practically probably <1%.
This commit is contained in:
Stenzek 2024-12-29 18:11:39 +10:00
parent 82a843c121
commit 242561debf
No known key found for this signature in database
8 changed files with 127 additions and 55 deletions

View File

@ -1492,18 +1492,15 @@ void CPU::CodeCache::CommitFarCode(u32 length)
void CPU::CodeCache::AlignCode(u32 alignment) void CPU::CodeCache::AlignCode(u32 alignment)
{ {
#if defined(CPU_ARCH_X64)
constexpr u8 padding_value = 0xcc; // int3
#else
constexpr u8 padding_value = 0x00;
#endif
DebugAssert(Common::IsPow2(alignment)); DebugAssert(Common::IsPow2(alignment));
const u32 num_padding_bytes = const u32 num_padding_bytes =
std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) - std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) -
reinterpret_cast<uintptr_t>(s_free_code_ptr)), reinterpret_cast<uintptr_t>(s_free_code_ptr)),
GetFreeCodeSpace()); GetFreeCodeSpace());
std::memset(s_free_code_ptr, padding_value, num_padding_bytes);
if (num_padding_bytes > 0)
EmitAlignmentPadding(s_free_code_ptr, num_padding_bytes);
s_free_code_ptr += num_padding_bytes; s_free_code_ptr += num_padding_bytes;
s_code_used += num_padding_bytes; s_code_used += num_padding_bytes;
} }

View File

@ -247,6 +247,7 @@ bool HasPreviouslyFaultedOnPC(u32 guest_pc);
u32 EmitASMFunctions(void* code, u32 code_size); u32 EmitASMFunctions(void* code, u32 code_size);
u32 EmitJump(void* code, const void* dst, bool flush_icache); u32 EmitJump(void* code, const void* dst, bool flush_icache);
void EmitAlignmentPadding(void* dst, size_t size);
void DisassembleAndLogHostCode(const void* start, u32 size); void DisassembleAndLogHostCode(const void* start, u32 size);
u32 GetHostInstructionCount(const void* start, u32 size); u32 GetHostInstructionCount(const void* start, u32 size);

View File

@ -103,8 +103,10 @@ void CPU::Recompiler::Recompiler::BeginBlock()
const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u32* host_code_size,
u32* host_far_code_size) u32* host_far_code_size)
{ {
Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(), CodeCache::AlignCode(FUNCTION_ALIGNMENT);
CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace());
Reset(block, CodeCache::GetFreeCodePointer(), CodeCache::GetFreeCodeSpace(), CodeCache::GetFreeFarCodePointer(),
CodeCache::GetFreeFarCodeSpace());
DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4); DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4);
@ -144,8 +146,8 @@ const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u
const void* code = EndCompile(&code_size, &far_code_size); const void* code = EndCompile(&code_size, &far_code_size);
*host_code_size = code_size; *host_code_size = code_size;
*host_far_code_size = far_code_size; *host_far_code_size = far_code_size;
CPU::CodeCache::CommitCode(code_size); CodeCache::CommitCode(code_size);
CPU::CodeCache::CommitFarCode(far_code_size); CodeCache::CommitFarCode(far_code_size);
return code; return code;
} }
@ -1158,7 +1160,8 @@ void CPU::Recompiler::Recompiler::RestoreHostState()
} }
void CPU::Recompiler::Recompiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, void CPU::Recompiler::Recompiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register,
u32 data_register, MemoryAccessSize size, bool is_signed, bool is_load) u32 data_register, MemoryAccessSize size, bool is_signed,
bool is_load)
{ {
DebugAssert(CodeCache::IsUsingFastmem()); DebugAssert(CodeCache::IsUsingFastmem());
DebugAssert(address_register < NUM_HOST_REGS); DebugAssert(address_register < NUM_HOST_REGS);

View File

@ -34,6 +34,9 @@ public:
static constexpr u32 NUM_HOST_REGS = 16; static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = true; static constexpr bool HAS_MEMORY_OPERANDS = true;
// Align functions to 16 bytes.
static constexpr u32 FUNCTION_ALIGNMENT = 16;
#elif defined(CPU_ARCH_ARM32) #elif defined(CPU_ARCH_ARM32)
// A reasonable "maximum" number of bytes per instruction. // A reasonable "maximum" number of bytes per instruction.
@ -44,6 +47,9 @@ public:
static constexpr u32 NUM_HOST_REGS = 16; static constexpr u32 NUM_HOST_REGS = 16;
static constexpr bool HAS_MEMORY_OPERANDS = false; static constexpr bool HAS_MEMORY_OPERANDS = false;
// Align functions to 4 bytes (word size).
static constexpr u32 FUNCTION_ALIGNMENT = 16;
#elif defined(CPU_ARCH_ARM64) #elif defined(CPU_ARCH_ARM64)
// A reasonable "maximum" number of bytes per instruction. // A reasonable "maximum" number of bytes per instruction.
@ -56,6 +62,9 @@ public:
static constexpr u32 NUM_HOST_REGS = 32; static constexpr u32 NUM_HOST_REGS = 32;
static constexpr bool HAS_MEMORY_OPERANDS = false; static constexpr bool HAS_MEMORY_OPERANDS = false;
// Align functions to 16 bytes.
static constexpr u32 FUNCTION_ALIGNMENT = 16;
#elif defined(CPU_ARCH_RISCV64) #elif defined(CPU_ARCH_RISCV64)
// Number of host registers. // Number of host registers.
@ -68,6 +77,9 @@ public:
static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64;
static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512; static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512;
// Align functions to 16 bytes.
static constexpr u32 FUNCTION_ALIGNMENT = 16;
#endif #endif
public: public:

View File

@ -320,14 +320,17 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->FinalizeCode(); armAsm->FinalizeCode();
#if 0
// TODO: align?
s_trampoline_targets.clear(); s_trampoline_targets.clear();
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset(); s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
s_trampoline_used = 0; s_trampoline_used = 0;
#endif
return static_cast<u32>(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/; return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
}
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
{
constexpr u8 padding_value = 0x00;
std::memset(dst, padding_value, size);
} }
CPU::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32) CPU::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32)
@ -1025,7 +1028,8 @@ void CPU::ARM32Recompiler::Flush(u32 flags)
void CPU::ARM32Recompiler::Compile_Fallback() void CPU::ARM32Recompiler::Compile_Fallback()
{ {
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits); WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
inst->bits);
Flush(FLUSH_FOR_INTERPRETER); Flush(FLUSH_FOR_INTERPRETER);

View File

@ -41,19 +41,20 @@ LOG_CHANNEL(Recompiler);
#define RSTATE vixl::aarch64::x19 #define RSTATE vixl::aarch64::x19
#define RMEMBASE vixl::aarch64::x20 #define RMEMBASE vixl::aarch64::x20
bool armIsCallerSavedRegister(u32 id); static bool armIsCallerSavedRegister(u32 id);
s64 armGetPCDisplacement(const void* current, const void* target); static s64 armGetPCDisplacement(const void* current, const void* target);
bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr); static bool armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr);
void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr); static void armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr);
void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm); static void armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm);
void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); static void armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline); static void armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline);
void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr); static void armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, const void* ptr);
void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, static void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
bool sign_extend_word = false); bool sign_extend_word = false);
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, static void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr,
const vixl::aarch64::Register& tempreg = RXSCRATCH); const vixl::aarch64::Register& tempreg = RXSCRATCH);
u8* armGetJumpTrampoline(const void* target); static u8* armGetJumpTrampoline(const void* target);
static void armAlignCode(vixl::aarch64::Assembler* armAsm, size_t alignment);
static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024;
static std::unordered_map<const void*, u32> s_trampoline_targets; static std::unordered_map<const void*, u32> s_trampoline_targets;
@ -327,8 +328,8 @@ void armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Regis
armAsm->ldr(reg, memop); armAsm->ldr(reg, memop);
} }
void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, const void* addr, [[maybe_unused]] void armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
const vixl::aarch64::Register& tempreg) const void* addr, const vixl::aarch64::Register& tempreg)
{ {
DebugAssert(tempreg.IsX()); DebugAssert(tempreg.IsX());
@ -359,7 +360,7 @@ u8* armGetJumpTrampoline(const void* target)
return s_trampoline_start_ptr + it->second; return s_trampoline_start_ptr + it->second;
// align to 16 bytes? // align to 16 bytes?
const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); const u32 offset = Common::AlignUpPow2(s_trampoline_used, CPU::Recompiler::FUNCTION_ALIGNMENT);
// 4 movs plus a jump // 4 movs plus a jump
if (TRAMPOLINE_AREA_SIZE - offset < 20) if (TRAMPOLINE_AREA_SIZE - offset < 20)
@ -387,6 +388,17 @@ u8* armGetJumpTrampoline(const void* target)
return start; return start;
} }
void armAlignCode(vixl::aarch64::Assembler* armAsm, size_t alignment)
{
size_t addr = armAsm->GetCursorAddress<size_t>();
const size_t end_addr = Common::AlignUpPow2(addr, alignment);
while (addr != end_addr)
{
armAsm->nop();
addr += vixl::aarch64::kInstructionSize;
}
}
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
{ {
#ifdef ENABLE_HOST_DISASSEMBLY #ifdef ENABLE_HOST_DISASSEMBLY
@ -434,7 +446,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
using namespace vixl::aarch64; using namespace vixl::aarch64;
Assembler actual_asm(static_cast<u8*>(code), code_size); Assembler actual_asm(static_cast<u8*>(code), code_size);
Assembler* armAsm = &actual_asm; Assembler* RESTRICT armAsm = &actual_asm;
#ifdef VIXL_DEBUG #ifdef VIXL_DEBUG
vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
@ -455,21 +467,19 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
} }
// check events then for frame done // check events then for frame done
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
{ {
Label skip_event_check;
armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
armAsm->ldr(RWARG2, PTR(&g_state.downcount)); armAsm->ldr(RWARG2, PTR(&g_state.downcount));
armAsm->cmp(RWARG1, RWARG2); armAsm->cmp(RWARG1, RWARG2);
armAsm->b(&skip_event_check, lt); armAsm->b(&dispatch, lt);
g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true); armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true);
armAsm->bind(&skip_event_check);
} }
// TODO: align? armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
g_dispatcher = armAsm->GetCursorAddress<const void*>(); g_dispatcher = armAsm->GetCursorAddress<const void*>();
{ {
armAsm->bind(&dispatch); armAsm->bind(&dispatch);
@ -486,6 +496,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->br(RXARG1); armAsm->br(RXARG1);
} }
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>(); g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
{ {
armAsm->ldr(RWARG1, PTR(&g_state.pc)); armAsm->ldr(RWARG1, PTR(&g_state.pc));
@ -493,6 +504,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->b(&dispatch); armAsm->b(&dispatch);
} }
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>(); g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>();
{ {
armAsm->ldr(RWARG1, PTR(&g_state.pc)); armAsm->ldr(RWARG1, PTR(&g_state.pc));
@ -500,6 +512,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->b(&dispatch); armAsm->b(&dispatch);
} }
armAlignCode(armAsm, Recompiler::FUNCTION_ALIGNMENT);
g_interpret_block = armAsm->GetCursorAddress<const void*>(); g_interpret_block = armAsm->GetCursorAddress<const void*>();
{ {
armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true); armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true);
@ -508,7 +521,6 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->FinalizeCode(); armAsm->FinalizeCode();
// TODO: align?
s_trampoline_targets.clear(); s_trampoline_targets.clear();
s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset(); s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
s_trampoline_used = 0; s_trampoline_used = 0;
@ -516,6 +528,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE; return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
} }
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
{
constexpr u8 padding_value = 0x00;
std::memset(dst, padding_value, size);
}
CPU::ARM64Recompiler::ARM64Recompiler() : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) CPU::ARM64Recompiler::ARM64Recompiler() : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode)
{ {
} }
@ -1174,7 +1192,8 @@ void CPU::ARM64Recompiler::Flush(u32 flags)
void CPU::ARM64Recompiler::Compile_Fallback() void CPU::ARM64Recompiler::Compile_Fallback()
{ {
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits); WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
inst->bits);
Flush(FLUSH_FOR_INTERPRETER); Flush(FLUSH_FOR_INTERPRETER);

View File

@ -317,6 +317,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes()); return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
} }
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
{
constexpr u8 padding_value = 0x00;
std::memset(dst, padding_value, size);
}
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
{ {
// TODO: get rid of assembler construction here // TODO: get rid of assembler construction here
@ -998,7 +1004,8 @@ void CPU::RISCV64Recompiler::Flush(u32 flags)
void CPU::RISCV64Recompiler::Compile_Fallback() void CPU::RISCV64Recompiler::Compile_Fallback()
{ {
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits); WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
inst->bits);
Flush(FLUSH_FOR_INTERPRETER); Flush(FLUSH_FOR_INTERPRETER);

View File

@ -36,6 +36,7 @@ LOG_CHANNEL(Recompiler);
// PGXP TODO: LWL etc, MFC0 // PGXP TODO: LWL etc, MFC0
// PGXP TODO: Spyro 1 level gates have issues. // PGXP TODO: Spyro 1 level gates have issues.
static constexpr u32 FUNCTION_ALIGNMENT = 16;
static constexpr u32 BACKPATCH_JMP_SIZE = 5; static constexpr u32 BACKPATCH_JMP_SIZE = 5;
static bool IsCallerSavedRegister(u32 id); static bool IsCallerSavedRegister(u32 id);
@ -134,20 +135,18 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
} }
// check events then for frame done // check events then for frame done
cg->align(FUNCTION_ALIGNMENT);
g_check_events_and_dispatch = cg->getCurr(); g_check_events_and_dispatch = cg->getCurr();
{ {
Label skip_event_check;
cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]); cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]);
cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]); cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]);
cg->jl(skip_event_check); cg->jl(dispatch);
g_run_events_and_dispatch = cg->getCurr(); g_run_events_and_dispatch = cg->getCurr();
cg->call(reinterpret_cast<const void*>(&TimingEvents::RunEvents)); cg->call(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
cg->L(skip_event_check);
} }
// TODO: align? cg->align(FUNCTION_ALIGNMENT);
g_dispatcher = cg->getCurr(); g_dispatcher = cg->getCurr();
{ {
cg->L(dispatch); cg->L(dispatch);
@ -164,6 +163,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]); cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
} }
cg->align(FUNCTION_ALIGNMENT);
g_compile_or_revalidate_block = cg->getCurr(); g_compile_or_revalidate_block = cg->getCurr();
{ {
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
@ -171,6 +171,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
cg->jmp(dispatch); cg->jmp(dispatch);
} }
cg->align(FUNCTION_ALIGNMENT);
g_discard_and_recompile_block = cg->getCurr(); g_discard_and_recompile_block = cg->getCurr();
{ {
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
@ -178,6 +179,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
cg->jmp(dispatch); cg->jmp(dispatch);
} }
cg->align(FUNCTION_ALIGNMENT);
g_interpret_block = cg->getCurr(); g_interpret_block = cg->getCurr();
{ {
cg->call(CodeCache::GetInterpretUncachedBlockFunction()); cg->call(CodeCache::GetInterpretUncachedBlockFunction());
@ -201,6 +203,32 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
return 5; return 5;
} }
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
{
// Copied from Xbyak nop(), to avoid constructing a CodeGenerator.
static const uint8_t nopTbl[9][9] = {
{0x90},
{0x66, 0x90},
{0x0F, 0x1F, 0x00},
{0x0F, 0x1F, 0x40, 0x00},
{0x0F, 0x1F, 0x44, 0x00, 0x00},
{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]);
u8* dst_ptr = static_cast<u8*>(dst);
while (size > 0)
{
size_t len = (std::min)(n, size);
const uint8_t* seq = nopTbl[len - 1];
std::memcpy(dst_ptr, seq, len);
dst_ptr += len;
size -= len;
}
}
#ifdef ENABLE_HOST_DISASSEMBLY #ifdef ENABLE_HOST_DISASSEMBLY
static ZydisFormatterFunc s_old_print_address; static ZydisFormatterFunc s_old_print_address;
@ -929,7 +957,8 @@ void CPU::X64Recompiler::Flush(u32 flags)
void CPU::X64Recompiler::Compile_Fallback() void CPU::X64Recompiler::Compile_Fallback()
{ {
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits); WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
inst->bits);
Flush(FLUSH_FOR_INTERPRETER); Flush(FLUSH_FOR_INTERPRETER);