CPU/Recompiler/AArch32: Load membase on demand
This commit is contained in:
parent
cce1ec598c
commit
52e0d8d473
|
@ -382,7 +382,6 @@ void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional<u32>& new
|
|||
DebugAssert(!m_dirty_pc);
|
||||
|
||||
// TODO: try extracting this to a function
|
||||
// TODO: move the cycle flush in here..
|
||||
|
||||
// save cycles for event test
|
||||
const TickCount cycles = std::exchange(m_cycles, 0);
|
||||
|
@ -621,7 +620,12 @@ void CPU::NewRec::AArch64Compiler::Flush(u32 flags)
|
|||
if (flags & FLUSH_INSTRUCTION_BITS)
|
||||
{
|
||||
// This sucks, but it's only used for fallbacks.
|
||||
Panic("Not implemented");
|
||||
EmitMov(RWARG1, inst->bits);
|
||||
EmitMov(RWARG2, m_current_instruction_pc);
|
||||
EmitMov(RWARG3, m_current_instruction_branch_delay_slot);
|
||||
armAsm->str(RWARG1, PTR(&g_state.current_instruction.bits));
|
||||
armAsm->str(RWARG2, PTR(&g_state.current_instruction_pc));
|
||||
armAsm->strb(RWARG3, PTR(&g_state.current_instruction_in_branch_delay_slot));
|
||||
}
|
||||
|
||||
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
|
||||
|
@ -699,26 +703,23 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
|
|||
{
|
||||
Flush(FLUSH_FOR_INTERPRETER);
|
||||
|
||||
#if 0
|
||||
cg->call(&CPU::Recompiler::Thunks::InterpretInstruction);
|
||||
EmitCall(armAsm, &CPU::Recompiler::Thunks::InterpretInstruction);
|
||||
|
||||
// TODO: make me less garbage
|
||||
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
|
||||
// but nothing should be going through here..
|
||||
Label no_load_delay;
|
||||
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);
|
||||
cg->cmp(RWARG1, static_cast<u8>(Reg::count));
|
||||
cg->je(no_load_delay, CodeGenerator::T_SHORT);
|
||||
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);
|
||||
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);
|
||||
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);
|
||||
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));
|
||||
cg->L(no_load_delay);
|
||||
armAsm->ldrb(RWARG1, PTR(&g_state.next_load_delay_reg));
|
||||
armAsm->cmp(RWARG1, static_cast<u8>(Reg::count));
|
||||
armAsm->b(&no_load_delay, eq);
|
||||
armAsm->ldr(RWARG2, PTR(&g_state.next_load_delay_value));
|
||||
armAsm->strb(RWARG1, PTR(&g_state.load_delay_reg));
|
||||
armAsm->str(RWARG2, PTR(&g_state.load_delay_value));
|
||||
EmitMov(RWARG1, static_cast<u32>(Reg::count));
|
||||
armAsm->strb(RWARG1, PTR(&g_state.next_load_delay_reg));
|
||||
armAsm->bind(&no_load_delay);
|
||||
|
||||
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
|
||||
#else
|
||||
Panic("Fixme");
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)
|
||||
|
|
|
@ -122,6 +122,7 @@ public:
|
|||
const Value& address, RegSize size, const Value& value);
|
||||
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value, bool in_far_code);
|
||||
void EnsureMembaseLoaded();
|
||||
void EmitUpdateFastmemBase();
|
||||
|
||||
// Unconditional branch to pointer. May allocate a scratch register.
|
||||
|
@ -291,6 +292,7 @@ private:
|
|||
bool m_load_delay_dirty = false;
|
||||
bool m_next_load_delay_dirty = false;
|
||||
bool m_gte_busy_cycles_dirty = false;
|
||||
bool m_membase_loaded = false;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Speculative Constants
|
||||
|
|
|
@ -224,13 +224,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
|||
#undef RARG3
|
||||
#undef RARG4
|
||||
#undef RSCRATCH
|
||||
#undef RMEMBASE
|
||||
#undef RSTATE
|
||||
|
||||
namespace CPU::Recompiler {
|
||||
|
||||
constexpr HostReg RCPUPTR = 4;
|
||||
constexpr HostReg RMEMBASEPTR = 5;
|
||||
constexpr HostReg RMEMBASEPTR = 3;
|
||||
constexpr HostReg RRETURN = 0;
|
||||
constexpr HostReg RARG1 = 0;
|
||||
constexpr HostReg RARG2 = 1;
|
||||
|
@ -385,14 +384,6 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
|
|||
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
|
||||
DebugAssert(cpu_reg_allocated);
|
||||
UNREFERENCED_VARIABLE(cpu_reg_allocated);
|
||||
|
||||
// If there's loadstore instructions, preload the fastmem base.
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
{
|
||||
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
|
||||
Assert(fastmem_reg_allocated);
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,9 +391,6 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* j
|
|||
{
|
||||
if (free_registers)
|
||||
{
|
||||
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
|
||||
m_register_cache.FreeHostReg(RMEMBASEPTR);
|
||||
|
||||
m_register_cache.FreeHostReg(RCPUPTR);
|
||||
m_register_cache.FreeHostReg(14);
|
||||
m_register_cache.PopCalleeSavedRegisters(true);
|
||||
|
@ -1058,6 +1046,7 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
|
|||
u32 CodeGenerator::PrepareStackForCall()
|
||||
{
|
||||
m_register_cache.PushCallerSavedRegisters();
|
||||
m_membase_loaded = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1351,13 +1340,24 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
|
|||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::EnsureMembaseLoaded()
|
||||
{
|
||||
if (m_membase_loaded)
|
||||
return;
|
||||
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
m_membase_loaded = true;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitUpdateFastmemBase()
|
||||
{
|
||||
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
|
||||
m_membase_loaded = false;
|
||||
}
|
||||
|
||||
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
EnsureMembaseLoaded();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
|
@ -1396,6 +1396,8 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, Value& result)
|
||||
{
|
||||
EnsureMembaseLoaded();
|
||||
|
||||
HostReg address_reg;
|
||||
if (address.IsConstant())
|
||||
{
|
||||
|
@ -1538,6 +1540,8 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
|
|||
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
|
||||
const Value& address, RegSize size, const Value& value)
|
||||
{
|
||||
EnsureMembaseLoaded();
|
||||
|
||||
Value actual_value = GetValueInHostRegister(value);
|
||||
|
||||
HostReg address_reg;
|
||||
|
|
|
@ -84,7 +84,6 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
|
|||
#define RARG4 vixl::aarch32::r3
|
||||
#define RSCRATCH vixl::aarch32::r12
|
||||
#define RSTATE vixl::aarch32::r4
|
||||
#define RMEMBASE vixl::aarch32::r5
|
||||
|
||||
s32 armGetPCDisplacement(const void* current, const void* target);
|
||||
bool armIsPCDisplacementInImmediateRange(s32 displacement);
|
||||
|
|
Loading…
Reference in New Issue