JitArm64: Check fastmem instead of fastmem_arena

Preparation for the next commit.

JitArm64 has been conflating these two flags. Most of the stuff that's
been guarded by fastmem_arena checks in fact requires fastmem.

When we have fastmem_arena without fastmem, it would be possible to do
things a bit more efficiently than what this commit does, but it's
non-trivial and therefore I will leave it out of this PR. With this
commit, we effectively have the same behavior as before this PR - plus
the added ability to toggle fastmem with a cache clear.
This commit is contained in:
JosJuice 2023-10-01 21:57:28 +02:00
parent b3bfcc5d7f
commit 0606433404
8 changed files with 52 additions and 46 deletions

View File

@ -364,8 +364,8 @@ void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
auto& memory = m_system.GetMemory();
ARM64Reg WD = gpr.GetReg();
ARM64Reg XD = EncodeRegTo64(WD);
MOVP2R(MEM_REG, jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), 32));
CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));

View File

@ -233,7 +233,7 @@ protected:
// Only emits fast access code. Must only be used if the guest address is known in advance
// and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash!
AlwaysUnsafe,
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena):
// Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem):
// Tries to run fast access code, and if that fails, uses backpatching to replace the code
// with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work,
// then branches to either the fast access code or the slow C++ code.
@ -259,10 +259,10 @@ protected:
//
// emitting_routine && mode == Auto: X2
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X3
// mode != AlwaysSafe && !jo.fastmem_arena: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30
// !emitting_routine && mode == Auto && jo.fastmem_arena: X30
// emitting_routine && mode != AlwaysSafe && !jo.fastmem: X3
// mode != AlwaysSafe && !jo.fastmem: X2
// !emitting_routine && mode != AlwaysSafe && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysUnsafe.

View File

@ -74,7 +74,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;
if (!jo.fastmem_arena)
if (!jo.fastmem)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
@ -158,7 +158,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
in_far_code = true;
SwitchToFarCode();
if (jo.fastmem_arena && !emitting_routine)
if (jo.fastmem && !emitting_routine)
{
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end];
fastmem_area->fastmem_code = fastmem_start;

View File

@ -28,7 +28,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
@ -124,7 +124,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -166,7 +166,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
}
@ -175,7 +175,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
ARM64Reg RS = gpr.R(value);
@ -272,7 +272,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
@ -335,7 +335,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
}
@ -519,7 +519,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
// MMU games make use of a >= d despite this being invalid according to the PEM.
@ -554,7 +554,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -567,7 +567,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
@ -582,7 +582,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1;
@ -615,7 +615,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
@ -623,7 +623,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
@ -818,12 +818,12 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
});
@ -892,7 +892,7 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W2)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W0,

View File

@ -79,7 +79,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
@ -168,7 +168,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck)
@ -194,7 +194,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
}
@ -279,7 +279,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1;
@ -372,7 +372,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (!jo.fastmem_arena)
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
@ -428,6 +428,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem_arena)
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
}

View File

@ -22,8 +22,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// If we have a fastmem arena, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !m_ppc_state.msr.DR);
// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
// X30 is LR
// X0 is the address
@ -44,7 +44,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
else if (!jo.fastmem)
{
gpr.Lock(ARM64Reg::W2);
}
@ -86,7 +86,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.fastmem_arena)
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
@ -136,7 +136,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
}
else if (!jo.fastmem_arena)
else if (!jo.fastmem)
{
gpr.Unlock(ARM64Reg::W2);
}
@ -147,8 +147,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
// If we have a fastmem arena, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem_arena && !m_ppc_state.msr.DR);
// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
// X30 is LR
// X0 contains the scale
@ -199,9 +199,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Lock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3);
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
@ -241,7 +241,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem_arena)
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
@ -275,9 +275,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem_arena)
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Unlock(ARM64Reg::W2);
if (!js.assumeNoPairedQuantize && !jo.fastmem_arena)
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
if (!js.assumeNoPairedQuantize)
fpr.Unlock(ARM64Reg::Q1);

View File

@ -724,7 +724,7 @@ void JitArm64::GenerateQuantizedStores()
// X0 is the scale
// X1 is the address
// X2 is a temporary
// X3 is a temporary if jo.fastmem_arena is false (used in EmitBackpatchRoutine)
// X3 is a temporary if jo.fastmem is false (used in EmitBackpatchRoutine)
// X30 is LR
// Q0 is the register
// Q1 is a temporary
@ -733,7 +733,7 @@ void JitArm64::GenerateQuantizedStores()
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 2};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
if (!jo.fastmem_arena)
if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
ARM64FloatEmitter float_emit(this);

View File

@ -105,15 +105,21 @@ void JitInterface::UpdateMembase()
auto& ppc_state = m_system.GetPPCState();
auto& memory = m_system.GetMemory();
#ifdef _M_ARM_64
// JitArm64 is currently using the no fastmem arena code path even when only fastmem is off.
const bool fastmem_arena = m_jit->jo.fastmem;
#else
const bool fastmem_arena = m_jit->jo.fastmem_arena;
#endif
if (ppc_state.msr.DR)
{
ppc_state.mem_ptr =
m_jit->jo.fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase();
fastmem_arena ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase();
}
else
{
ppc_state.mem_ptr =
m_jit->jo.fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase();
fastmem_arena ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase();
}
}