CPU/Recompiler/AArch32: Load membase on demand

This commit is contained in:
Stenzek 2023-10-19 21:53:57 +10:00
parent cce1ec598c
commit 52e0d8d473
No known key found for this signature in database
4 changed files with 36 additions and 30 deletions

View File

@ -382,7 +382,6 @@ void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional<u32>& new
DebugAssert(!m_dirty_pc); DebugAssert(!m_dirty_pc);
// TODO: try extracting this to a function // TODO: try extracting this to a function
// TODO: move the cycle flush in here..
// save cycles for event test // save cycles for event test
const TickCount cycles = std::exchange(m_cycles, 0); const TickCount cycles = std::exchange(m_cycles, 0);
@ -621,7 +620,12 @@ void CPU::NewRec::AArch64Compiler::Flush(u32 flags)
if (flags & FLUSH_INSTRUCTION_BITS) if (flags & FLUSH_INSTRUCTION_BITS)
{ {
// This sucks, but it's only used for fallbacks. // This sucks, but it's only used for fallbacks.
Panic("Not implemented"); EmitMov(RWARG1, inst->bits);
EmitMov(RWARG2, m_current_instruction_pc);
EmitMov(RWARG3, m_current_instruction_branch_delay_slot);
armAsm->str(RWARG1, PTR(&g_state.current_instruction.bits));
armAsm->str(RWARG2, PTR(&g_state.current_instruction_pc));
armAsm->strb(RWARG3, PTR(&g_state.current_instruction_in_branch_delay_slot));
} }
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty) if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
@ -699,26 +703,23 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback()
{ {
Flush(FLUSH_FOR_INTERPRETER); Flush(FLUSH_FOR_INTERPRETER);
#if 0 EmitCall(armAsm, &CPU::Recompiler::Thunks::InterpretInstruction);
cg->call(&CPU::Recompiler::Thunks::InterpretInstruction);
// TODO: make me less garbage // TODO: make me less garbage
// TODO: this is wrong, it flushes the load delay on the same cycle when we return. // TODO: this is wrong, it flushes the load delay on the same cycle when we return.
// but nothing should be going through here.. // but nothing should be going through here..
Label no_load_delay; Label no_load_delay;
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]); armAsm->ldrb(RWARG1, PTR(&g_state.next_load_delay_reg));
cg->cmp(RWARG1, static_cast<u8>(Reg::count)); armAsm->cmp(RWARG1, static_cast<u8>(Reg::count));
cg->je(no_load_delay, CodeGenerator::T_SHORT); armAsm->b(&no_load_delay, eq);
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]); armAsm->ldr(RWARG2, PTR(&g_state.next_load_delay_value));
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1); armAsm->strb(RWARG1, PTR(&g_state.load_delay_reg));
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2); armAsm->str(RWARG2, PTR(&g_state.load_delay_value));
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count)); EmitMov(RWARG1, static_cast<u32>(Reg::count));
cg->L(no_load_delay); armAsm->strb(RWARG1, PTR(&g_state.next_load_delay_reg));
armAsm->bind(&no_load_delay);
m_load_delay_dirty = EMULATE_LOAD_DELAYS; m_load_delay_dirty = EMULATE_LOAD_DELAYS;
#else
Panic("Fixme");
#endif
} }
void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg) void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::WRegister& pcreg)

View File

@ -122,6 +122,7 @@ public:
const Value& address, RegSize size, const Value& value); const Value& address, RegSize size, const Value& value);
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value, bool in_far_code); const Value& address, RegSize size, const Value& value, bool in_far_code);
void EnsureMembaseLoaded();
void EmitUpdateFastmemBase(); void EmitUpdateFastmemBase();
// Unconditional branch to pointer. May allocate a scratch register. // Unconditional branch to pointer. May allocate a scratch register.
@ -291,6 +292,7 @@ private:
bool m_load_delay_dirty = false; bool m_load_delay_dirty = false;
bool m_next_load_delay_dirty = false; bool m_next_load_delay_dirty = false;
bool m_gte_busy_cycles_dirty = false; bool m_gte_busy_cycles_dirty = false;
bool m_membase_loaded = false;
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Speculative Constants // Speculative Constants

View File

@ -224,13 +224,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
#undef RARG3 #undef RARG3
#undef RARG4 #undef RARG4
#undef RSCRATCH #undef RSCRATCH
#undef RMEMBASE
#undef RSTATE #undef RSTATE
namespace CPU::Recompiler { namespace CPU::Recompiler {
constexpr HostReg RCPUPTR = 4; constexpr HostReg RCPUPTR = 4;
constexpr HostReg RMEMBASEPTR = 5; constexpr HostReg RMEMBASEPTR = 3;
constexpr HostReg RRETURN = 0; constexpr HostReg RRETURN = 0;
constexpr HostReg RARG1 = 0; constexpr HostReg RARG1 = 0;
constexpr HostReg RARG2 = 1; constexpr HostReg RARG2 = 1;
@ -385,14 +384,6 @@ void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
// m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state)); // m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state));
DebugAssert(cpu_reg_allocated); DebugAssert(cpu_reg_allocated);
UNREFERENCED_VARIABLE(cpu_reg_allocated); UNREFERENCED_VARIABLE(cpu_reg_allocated);
// If there's loadstore instructions, preload the fastmem base.
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
{
const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
Assert(fastmem_reg_allocated);
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
}
} }
} }
@ -400,9 +391,6 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* j
{ {
if (free_registers) if (free_registers)
{ {
if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
m_register_cache.FreeHostReg(RMEMBASEPTR);
m_register_cache.FreeHostReg(RCPUPTR); m_register_cache.FreeHostReg(RCPUPTR);
m_register_cache.FreeHostReg(14); m_register_cache.FreeHostReg(14);
m_register_cache.PopCalleeSavedRegisters(true); m_register_cache.PopCalleeSavedRegisters(true);
@ -1058,6 +1046,7 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond
u32 CodeGenerator::PrepareStackForCall() u32 CodeGenerator::PrepareStackForCall()
{ {
m_register_cache.PushCallerSavedRegisters(); m_register_cache.PushCallerSavedRegisters();
m_membase_loaded = false;
return 0; return 0;
} }
@ -1351,13 +1340,24 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
} }
} }
void CodeGenerator::EnsureMembaseLoaded()
{
if (m_membase_loaded)
return;
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base)));
m_membase_loaded = true;
}
void CodeGenerator::EmitUpdateFastmemBase() void CodeGenerator::EmitUpdateFastmemBase()
{ {
m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), offsetof(State, fastmem_base))); m_membase_loaded = false;
} }
void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
{ {
EnsureMembaseLoaded();
HostReg address_reg; HostReg address_reg;
if (address.IsConstant()) if (address.IsConstant())
{ {
@ -1396,6 +1396,8 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, Value& result) const Value& address, RegSize size, Value& result)
{ {
EnsureMembaseLoaded();
HostReg address_reg; HostReg address_reg;
if (address.IsConstant()) if (address.IsConstant())
{ {
@ -1538,6 +1540,8 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const Co
void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value) const Value& address, RegSize size, const Value& value)
{ {
EnsureMembaseLoaded();
Value actual_value = GetValueInHostRegister(value); Value actual_value = GetValueInHostRegister(value);
HostReg address_reg; HostReg address_reg;

View File

@ -84,7 +84,6 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
#define RARG4 vixl::aarch32::r3 #define RARG4 vixl::aarch32::r3
#define RSCRATCH vixl::aarch32::r12 #define RSCRATCH vixl::aarch32::r12
#define RSTATE vixl::aarch32::r4 #define RSTATE vixl::aarch32::r4
#define RMEMBASE vixl::aarch32::r5
s32 armGetPCDisplacement(const void* current, const void* target); s32 armGetPCDisplacement(const void* current, const void* target);
bool armIsPCDisplacementInImmediateRange(s32 displacement); bool armIsPCDisplacementInImmediateRange(s32 displacement);