CPU/Recompiler/AArch32: Load membase on demand
This commit is contained in:
parent
cce1ec598c
commit
52e0d8d473
|
@ -382,7 +382,6 @@ void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional<u32>& new
|
||||||
DebugAssert(!m_dirty_pc);
|
DebugAssert(!m_dirty_pc);
|
||||||
|
|
||||||
// TODO: try extracting this to a function
|
// TODO: try extracting this to a function
|
||||||
// TODO: move the cycle flush in here..
|
|
||||||
|
|
||||||
// save cycles for event test
|
// save cycles for event test
|
||||||
const TickCount cycles = std::exchange(m_cycles, 0);
|
const TickCount cycles = std::exchange(m_cycles, 0);
|
||||||
|
@ -621,7 +620,12 @@ void CPU::NewRec::AArch64Compiler::Flush(u32 flags)
|
||||||
if (flags & FLUSH_INSTRUCTION_BITS)
|
if (flags & FLUSH_INSTRUCTION_BITS)
|
||||||
{
|
{
|
||||||
// This sucks, but it's only used for fallbacks.
|
// This sucks, but it's only used for fallbacks.
|
||||||
Panic("Not implemented");
|
EmitMov(RWARG1, inst->bits);
|
||||||
|
EmitMov(RWARG2, m_current_instruction_pc);
|
||||||
|
EmitMov(RWARG3, m_current_instruction_branch_delay_slot);
|
||||||
|
armAsm->str(RWARG1, PTR(&g_state.current_instruction.bits));
|
||||||
|
armAsm->str(RWARG2, PTR(&g_state.current_instruction_pc));
|
||||||
|
armAsm->strb(RWARG3, PTR(&g_state.current_instruction_in_branch_delay_slot));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
|
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
|
||||||
|
@ -699,26 +703,23 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
|
||||||
{
|
{
|
||||||
Flush(FLUSH_FOR_INTERPRETER);
|
Flush(FLUSH_FOR_INTERPRETER);
|
||||||
|
|
||||||
#if 0
|
EmitCall(armAsm, &CPU::Recompiler::Thunks::InterpretInstruction);
|
||||||
cg->call(&CPU::Recompiler::Thunks::InterpretInstruction);
|
|
||||||
|
|
||||||
// TODO: make me less garbage
|
// TODO: make me less garbage
|
||||||
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
|
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
|
||||||
// but nothing should be going through here..
|
// but nothing should be going through here..
|
||||||
Label no_load_delay;
|
Label no_load_delay;
|
||||||
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);
|
armAsm->ldrb(RWARG1, PTR(&g_state.next_load_delay_reg));
|
||||||
cg->cmp(RWARG1, static_cast<u8>(Reg::count));
|
armAsm->cmp(RWARG1, static_cast<u8>(Reg::count));
|
||||||
cg->je(no_load_delay, CodeGenerator::T_SHORT);
|
armAsm->b(&no_load_delay, eq);
|
||||||
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);
|
armAsm->ldr(RWARG2, PTR(&g_state.next_load_delay_value));
|
||||||
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);
|
armAsm->strb(RWARG1, PTR(&g_state.load_delay_reg));
|
||||||
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);
|
armAsm->str(RWARG2, PTR(&g_state.load_delay_value));
|
||||||
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));
|
EmitMov(RWARG1, static_cast<u32>(Reg::count));
|
||||||
cg->L(no_load_delay);
|
armAsm->strb(RWARG1, PTR(&g_state.next_load_delay_reg));
|
||||||
|
armAsm->bind(&no_load_delay);
|
||||||
|
|
||||||
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
|
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
|
||||||
#else
|
|
||||||
Panic("Fixme");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
|
void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
|
||||||
|
|
|
@ -122,6 +122,7 @@ public:
|
||||||
const Value& address, RegSize size, const Value& value);
|
const Value& address, RegSize size, const Value& value);
|
||||||
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||||
const Value& address, RegSize size, const Value& value, bool in_far_code);
|
const Value& address, RegSize size, const Value& value, bool in_far_code);
|
||||||
|
void EnsureMembaseLoaded();
|
||||||
void EmitUpdateFastmemBase();
|
void EmitUpdateFastmemBase();
|
||||||
|
|
||||||
// Unconditional branch to pointer. May allocate a scratch register.
|
// Unconditional branch to pointer. May allocate a scratch register.
|
||||||
|
@ -291,6 +292,7 @@ private:
|
||||||
bool m_load_delay_dirty = false;
|
bool m_load_delay_dirty = false;
|
||||||
bool m_next_load_delay_dirty = false;
|
bool m_next_load_delay_dirty = false;
|
||||||
bool m_gte_busy_cycles_dirty = false;
|
bool m_gte_busy_cycles_dirty = false;
|
||||||
|
bool m_membase_loaded = false;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Speculative Constants
|
// Speculative Constants
|
||||||
|
|
|
@ -224,13 +224,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||||
#undef RARG3
|
#undef RARG3
|
||||||
#undef RARG4
|
#undef RARG4
|
||||||
#undef RSCRATCH
|
#undef RSCRATCH
|
||||||
#undef RMEMBASE
|
|
||||||
#undef RSTATE
|
#undef RSTATE
|
||||||
|
|
||||||
namespace CPU::Recompiler {
|
namespace CPU::Recompiler {
|
||||||
|
|
||||||
constexpr HostReg RCPUPTR = 4;
|
constexpr HostReg RCPUPTR = 4;
|
||||||
constexpr HostReg RMEMBASEPTR = 5;
|
constexpr HostReg RMEMBASEPTR = 3;
|
||||||
constexpr HostReg RRETURN = 0;
|
constexpr HostReg RRETURN = 0;
|
||||||
constexpr HostReg RARG1 = 0;
|
constexpr HostReg RARG1 = 0;
|
||||||
constexpr HostReg RARG2 = 1;
|
constexpr HostReg RARG2 = 1;
|
||||||
|
@ -385,14 +384,6 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
||||||
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
||||||
DebugAssert(cpu_reg_allocated);
|
DebugAssert(cpu_reg_allocated);
|
||||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||||
|
|
||||||
// If there's loadstore instructions, preload the fastmem base.
|
|
||||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
|
||||||
{
|
|
||||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
|
||||||
Assert(fastmem_reg_allocated);
|
|
||||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -400,9 +391,6 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* j
|
||||||
{
|
{
|
||||||
if (free_registers)
|
if (free_registers)
|
||||||
{
|
{
|
||||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
|
||||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
|
||||||
|
|
||||||
m_register_cache.FreeHostReg(RCPUPTR);
|
m_register_cache.FreeHostReg(RCPUPTR);
|
||||||
m_register_cache.FreeHostReg(14);
|
m_register_cache.FreeHostReg(14);
|
||||||
m_register_cache.PopCalleeSavedRegisters(true);
|
m_register_cache.PopCalleeSavedRegisters(true);
|
||||||
|
@ -1058,6 +1046,7 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
|
||||||
u32 CodeGenerator::PrepareStackForCall()
|
u32 CodeGenerator::PrepareStackForCall()
|
||||||
{
|
{
|
||||||
m_register_cache.PushCallerSavedRegisters();
|
m_register_cache.PushCallerSavedRegisters();
|
||||||
|
m_membase_loaded = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1351,13 +1340,24 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CodeGenerator::EnsureMembaseLoaded()
|
||||||
|
{
|
||||||
|
if (m_membase_loaded)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||||
|
m_membase_loaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitUpdateFastmemBase()
|
void CodeGenerator::EmitUpdateFastmemBase()
|
||||||
{
|
{
|
||||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
m_membase_loaded = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
|
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
|
||||||
{
|
{
|
||||||
|
EnsureMembaseLoaded();
|
||||||
|
|
||||||
HostReg address_reg;
|
HostReg address_reg;
|
||||||
if (address.IsConstant())
|
if (address.IsConstant())
|
||||||
{
|
{
|
||||||
|
@ -1396,6 +1396,8 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
||||||
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||||
const Value& address, RegSize size, Value& result)
|
const Value& address, RegSize size, Value& result)
|
||||||
{
|
{
|
||||||
|
EnsureMembaseLoaded();
|
||||||
|
|
||||||
HostReg address_reg;
|
HostReg address_reg;
|
||||||
if (address.IsConstant())
|
if (address.IsConstant())
|
||||||
{
|
{
|
||||||
|
@ -1538,6 +1540,8 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
|
||||||
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||||
const Value& address, RegSize size, const Value& value)
|
const Value& address, RegSize size, const Value& value)
|
||||||
{
|
{
|
||||||
|
EnsureMembaseLoaded();
|
||||||
|
|
||||||
Value actual_value = GetValueInHostRegister(value);
|
Value actual_value = GetValueInHostRegister(value);
|
||||||
|
|
||||||
HostReg address_reg;
|
HostReg address_reg;
|
||||||
|
|
|
@ -84,7 +84,6 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
|
||||||
#define RARG4 vixl::aarch32::r3
|
#define RARG4 vixl::aarch32::r3
|
||||||
#define RSCRATCH vixl::aarch32::r12
|
#define RSCRATCH vixl::aarch32::r12
|
||||||
#define RSTATE vixl::aarch32::r4
|
#define RSTATE vixl::aarch32::r4
|
||||||
#define RMEMBASE vixl::aarch32::r5
|
|
||||||
|
|
||||||
s32 armGetPCDisplacement(const void* current, const void* target);
|
s32 armGetPCDisplacement(const void* current, const void* target);
|
||||||
bool armIsPCDisplacementInImmediateRange(s32 displacement);
|
bool armIsPCDisplacementInImmediateRange(s32 displacement);
|
||||||
|
|
Loading…
Reference in New Issue