PowerPC: Raise alignment exceptions in more situations

To avoid affecting performance, the JITs will most of the time not raise
alignment exceptions unless you enable the new INI-only setting
AlignmentExceptions.
This commit is contained in:
JosJuice 2021-07-04 20:47:04 +02:00
parent 1211a6b62e
commit 7e9a49746a
15 changed files with 238 additions and 203 deletions

View File

@ -673,14 +673,19 @@ static constexpr u32 MaskImm26(s64 distance)
}
// FixupBranch branching
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch)
{
SetJumpTarget(branch, m_code);
}
void ARM64XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
{
if (!branch.ptr)
return;
bool Not = false;
u32 inst = 0;
s64 distance = (s64)(m_code - branch.ptr);
s64 distance = static_cast<s64>(target - branch.ptr);
distance >>= 2;
switch (branch.type)

View File

@ -695,7 +695,8 @@ public:
bool HasWriteFailed() const { return m_write_failed; }
// FixupBranch branching
void SetJumpTarget(FixupBranch const& branch);
void SetJumpTarget(const FixupBranch& branch);
void SetJumpTarget(const FixupBranch& branch, const u8* target);
[[nodiscard]] FixupBranch CBZ(ARM64Reg Rt);
[[nodiscard]] FixupBranch CBNZ(ARM64Reg Rt);
[[nodiscard]] FixupBranch B(CCFlags cond);

View File

@ -15,10 +15,18 @@ enum class ProgramExceptionCause : u32
Trap = 1 << (31 - 14),
};
inline void GenerateAlignmentException(PowerPC::PowerPCState& ppc_state, u32 address)
inline void GenerateAlignmentException(PowerPC::PowerPCState& ppc_state, u32 effective_address,
UGeckoInstruction inst)
{
ppc_state.Exceptions |= EXCEPTION_ALIGNMENT;
ppc_state.spr[SPR_DAR] = address;
ppc_state.spr[SPR_DAR] = effective_address;
// It has not been hardware tested what gets used instead of RD and RA in
// the cases documented as undefined. For now, simply use RD and RA
const bool x = inst.OPCD >= 32;
const u32 op = x ? inst.SUBOP10 : (inst.OPCD >> 1);
const u32 dsisr = ((op >> 8) << 15) | ((op & 0b11111) << 10) | (inst.RD << 5) | (inst.RA);
ppc_state.spr[SPR_DSISR] = dsisr;
}
inline void GenerateDSIException(PowerPC::PowerPCState& ppc_state, u32 address)

View File

@ -64,13 +64,6 @@ void Interpreter::lfd(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u64 temp = interpreter.m_mmu.Read_U64(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -81,13 +74,6 @@ void Interpreter::lfdu(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_U(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u64 temp = interpreter.m_mmu.Read_U64(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -101,13 +87,6 @@ void Interpreter::lfdux(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_UX(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u64 temp = interpreter.m_mmu.Read_U64(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -121,13 +100,6 @@ void Interpreter::lfdx(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u64 temp = interpreter.m_mmu.Read_U64(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -138,13 +110,6 @@ void Interpreter::lfs(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u32 temp = interpreter.m_mmu.Read_U32(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -158,13 +123,6 @@ void Interpreter::lfsu(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_U(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u32 temp = interpreter.m_mmu.Read_U32(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -179,13 +137,6 @@ void Interpreter::lfsux(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_UX(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u32 temp = interpreter.m_mmu.Read_U32(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -200,13 +151,6 @@ void Interpreter::lfsx(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u32 temp = interpreter.m_mmu.Read_U32(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -270,9 +214,9 @@ void Interpreter::lmw(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0 || ppc_state.msr.LE)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, address);
GenerateAlignmentException(ppc_state, address, inst);
return;
}
@ -302,9 +246,9 @@ void Interpreter::stmw(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0 || ppc_state.msr.LE)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, address);
GenerateAlignmentException(ppc_state, address, inst);
return;
}
@ -368,12 +312,6 @@ void Interpreter::stfd(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst);
}
@ -382,12 +320,6 @@ void Interpreter::stfdu(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_U(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
{
@ -400,12 +332,6 @@ void Interpreter::stfs(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst);
}
@ -414,12 +340,6 @@ void Interpreter::stfsu(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_U(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
{
@ -541,7 +461,7 @@ void Interpreter::dcbz(Interpreter& interpreter, UGeckoInstruction inst)
if (!HID0(ppc_state).DCE)
{
GenerateAlignmentException(ppc_state, dcbz_addr);
GenerateAlignmentException(ppc_state, dcbz_addr, inst);
return;
}
@ -572,7 +492,7 @@ void Interpreter::dcbz_l(Interpreter& interpreter, UGeckoInstruction inst)
if (!HID0(ppc_state).DCE)
{
GenerateAlignmentException(ppc_state, address);
GenerateAlignmentException(ppc_state, address, inst);
return;
}
@ -592,13 +512,11 @@ void Interpreter::eciwx(Interpreter& interpreter, UGeckoInstruction inst)
return;
}
if ((EA & 0b11) != 0)
const u32 temp = interpreter.m_mmu.Read_U32(EA, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
{
GenerateAlignmentException(ppc_state, EA);
return;
ppc_state.gpr[inst.RD] = temp;
}
ppc_state.gpr[inst.RD] = interpreter.m_mmu.Read_U32(EA, inst);
}
void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst)
@ -612,12 +530,6 @@ void Interpreter::ecowx(Interpreter& interpreter, UGeckoInstruction inst)
return;
}
if ((EA & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, EA);
return;
}
interpreter.m_mmu.Write_U32(ppc_state.gpr[inst.RS], EA, inst);
}
@ -724,6 +636,7 @@ void Interpreter::lhzx(Interpreter& interpreter, UGeckoInstruction inst)
}
// FIXME: Should rollback if a DSI occurs
// TODO: Should this be able to cause alignment exceptions?
void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
@ -731,7 +644,7 @@ void Interpreter::lswx(Interpreter& interpreter, UGeckoInstruction inst)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, EA);
GenerateAlignmentException(ppc_state, EA, inst);
return;
}
@ -817,12 +730,6 @@ void Interpreter::stfdux(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_UX(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
{
@ -835,12 +742,6 @@ void Interpreter::stfdx(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U64(ppc_state.ps[inst.FS].PS0AsU64(), address, inst);
}
@ -850,12 +751,6 @@ void Interpreter::stfiwx(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U32(ppc_state.ps[inst.FS].PS0AsU32(), address, inst);
}
@ -864,12 +759,6 @@ void Interpreter::stfsux(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_UX(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
{
@ -882,12 +771,6 @@ void Interpreter::stfsx(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
interpreter.m_mmu.Write_U32(ConvertToSingle(ppc_state.ps[inst.FS].PS0AsU64()), address, inst);
}
@ -917,6 +800,7 @@ void Interpreter::sthx(Interpreter& interpreter, UGeckoInstruction inst)
// lswi - bizarro string instruction
// FIXME: Should rollback if a DSI occurs
// TODO: Should this be able to cause alignment exceptions?
void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
@ -926,7 +810,7 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, EA);
GenerateAlignmentException(ppc_state, EA, inst);
return;
}
@ -965,6 +849,7 @@ void Interpreter::lswi(Interpreter& interpreter, UGeckoInstruction inst)
// todo : optimize ?
// stswi - bizarro string instruction
// FIXME: Should rollback if a DSI occurs
// TODO: Should this be able to cause alignment exceptions?
void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
@ -974,7 +859,7 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, EA);
GenerateAlignmentException(ppc_state, EA, inst);
return;
}
@ -1006,6 +891,7 @@ void Interpreter::stswi(Interpreter& interpreter, UGeckoInstruction inst)
}
// TODO: is this right? is it DSI interruptible?
// TODO: Should this be able to cause alignment exceptions?
void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst)
{
auto& ppc_state = interpreter.m_ppc_state;
@ -1013,7 +899,7 @@ void Interpreter::stswx(Interpreter& interpreter, UGeckoInstruction inst)
if (ppc_state.msr.LE)
{
GenerateAlignmentException(ppc_state, EA);
GenerateAlignmentException(ppc_state, EA, inst);
return;
}
@ -1052,12 +938,6 @@ void Interpreter::lwarx(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
const u32 temp = interpreter.m_mmu.Read_U32(address, inst);
if (!(ppc_state.Exceptions & ANY_LOADSTORE_EXCEPTION))
@ -1074,12 +954,6 @@ void Interpreter::stwcxd(Interpreter& interpreter, UGeckoInstruction inst)
auto& ppc_state = interpreter.m_ppc_state;
const u32 address = Helper_Get_EA_X(ppc_state, inst);
if ((address & 0b11) != 0)
{
GenerateAlignmentException(ppc_state, address);
return;
}
if (ppc_state.reserve)
{
if (address == ppc_state.reserve_address)

View File

@ -460,7 +460,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
if (emit_fast_path)
{
// Perform lookup to see if we can use fast path.
// Perform BAT lookup to see if we can use fast path.
MOV(64, R(RSCRATCH2), ImmPtr(m_mmu.GetDBATTable().data()));
PUSH(RSCRATCH);
SHR(32, R(RSCRATCH), Imm8(PowerPC::BAT_INDEX_SHIFT));

View File

@ -102,7 +102,7 @@ FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp, const void*
return J_CC(CC_NC, m_far_code.Enabled() ? Jump::Near : Jump::Short);
}
FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
FixupBranch EmuCodeBlock::CheckIfBATSafeAddress(const OpArg& reg_value, X64Reg reg_addr,
BitSet32 registers_in_use)
{
registers_in_use[reg_addr] = true;
@ -118,7 +118,7 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
if (reg_addr != RSCRATCH_EXTRA)
MOV(32, R(RSCRATCH_EXTRA), R(reg_addr));
// Perform lookup to see if we can use fast path.
// Perform BAT lookup to see if we can use fast path.
MOV(64, R(RSCRATCH), ImmPtr(m_jit.m_mmu.GetDBATTable().data()));
SHR(32, R(RSCRATCH_EXTRA), Imm8(PowerPC::BAT_INDEX_SHIFT));
TEST(32, MComplex(RSCRATCH, RSCRATCH_EXTRA, SCALE_4, 0), Imm32(PowerPC::BAT_PHYSICAL_BIT));
@ -131,6 +131,13 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
return J_CC(CC_Z, m_far_code.Enabled() ? Jump::Near : Jump::Short);
}
FixupBranch EmuCodeBlock::CheckIfAlignmentSafeAddress(X64Reg reg_addr, int access_size,
UGeckoInstruction inst)
{
TEST(32, R(reg_addr), Imm32(PowerPC::GetAlignmentMask(access_size)));
return J_CC(CC_NZ, m_far_code.Enabled() ? Jump::Near : Jump::Short);
}
void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
bool swap, MovInfo* info)
{
@ -321,11 +328,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
bool signExtend, int flags)
{
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
bool check_alignment = m_jit.jo.alignment_exceptions &&
PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize);
auto& js = m_jit.js;
registersInUse[reg_value] = false;
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
!force_slow_access)
!force_slow_access && !check_alignment)
{
u8* backpatchStart = GetWritableCodePtr();
MovInfo mov;
@ -379,13 +388,21 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse);
FixupBranch slow_1;
if (check_alignment)
slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst);
FixupBranch slow_2 = CheckIfBATSafeAddress(R(reg_value), reg_addr, registersInUse);
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
if (m_far_code.Enabled())
SwitchToFarCode();
else
exit = J(Jump::Near);
SetJumpTarget(slow);
if (check_alignment)
SetJumpTarget(slow_1);
SetJumpTarget(slow_2);
}
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
@ -444,7 +461,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
bool signExtend)
{
// If the address is known to be RAM, just load it directly.
if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize))
if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize, inst))
{
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
return;
@ -499,13 +516,15 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
{
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
bool force_slow_access = (flags & SAFE_LOADSTORE_FORCE_SLOW_ACCESS) != 0;
bool check_alignment = m_jit.jo.alignment_exceptions &&
PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, accessSize);
// set the correct immediate format
reg_value = FixImmediate(accessSize, reg_value);
auto& js = m_jit.js;
if (m_jit.jo.fastmem && !(flags & (SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_UPDATE_PC)) &&
!force_slow_access)
!force_slow_access && !check_alignment)
{
u8* backpatchStart = GetWritableCodePtr();
MovInfo mov;
@ -555,13 +574,21 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)
{
FixupBranch slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse);
FixupBranch slow_1;
if (check_alignment)
slow_1 = CheckIfAlignmentSafeAddress(reg_addr, accessSize, inst);
FixupBranch slow_2 = CheckIfBATSafeAddress(reg_value, reg_addr, registersInUse);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (m_far_code.Enabled())
SwitchToFarCode();
else
exit = J(Jump::Near);
SetJumpTarget(slow);
if (check_alignment)
SetJumpTarget(slow_1);
SetJumpTarget(slow_2);
}
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
@ -661,7 +688,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
m_jit.js.fifoBytesSinceCheck += accessSize >> 3;
return false;
}
else if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize))
else if (m_jit.jo.fastmem_arena && m_jit.m_mmu.IsOptimizableRAMAddress(address, accessSize, inst))
{
WriteToConstRamAddress(accessSize, arg, address);
return false;

View File

@ -54,8 +54,11 @@ public:
// Jumps to the returned FixupBranch if lookup fails.
Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp, const void* bat_table);
Gen::FixupBranch CheckIfSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
Gen::FixupBranch CheckIfBATSafeAddress(const Gen::OpArg& reg_value, Gen::X64Reg reg_addr,
BitSet32 registers_in_use);
Gen::FixupBranch CheckIfAlignmentSafeAddress(Gen::X64Reg reg_addr, int access_size,
UGeckoInstruction inst);
// these return the address of the MOV, for backpatching
void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);

View File

@ -71,6 +71,16 @@ void JitArm64::EmitBackpatchRoutine(UGeckoInstruction inst, u32 flags, MemAccess
if (emit_fast_access)
{
if (emit_slow_access && jo.alignment_exceptions &&
PowerPC::AccessCausesAlignmentExceptionIfMisaligned(inst, access_size))
{
const u32 mask = PowerPC::GetAlignmentMask(access_size);
TST(addr, LogicalImm(mask, GPRSize::B32));
FixupBranch fast = B(CCFlags::CC_EQ);
slow_access_fixup = emitting_routine ? B() : BL();
SetJumpTarget(fast);
}
ARM64Reg memory_base = MEM_REG;
ARM64Reg memory_offset = addr;

View File

@ -138,7 +138,7 @@ void JitArm64::SafeLoadToReg(UGeckoInstruction inst, u32 dest, s32 addr, s32 off
if (is_immediate)
mmio_address = m_mmu.IsOptimizableMMIOAccess(imm_addr, access_size);
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size, inst))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
@ -310,7 +310,7 @@ void JitArm64::SafeStoreFromReg(UGeckoInstruction inst, s32 dest, u32 value, s32
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size, inst))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use,

View File

@ -174,7 +174,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
if (is_immediate &&
m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst))
{
EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use,
fprs_in_use);
@ -400,7 +401,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
STR(IndexType::Unsigned, ARM64Reg::X2, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags), inst))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(inst, flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,

View File

@ -76,6 +76,7 @@ const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 24> JitB
{&JitBase::m_enable_branch_following, &Config::MAIN_JIT_FOLLOW_BRANCH},
{&JitBase::m_enable_float_exceptions, &Config::MAIN_FLOAT_EXCEPTIONS},
{&JitBase::m_enable_div_by_zero_exceptions, &Config::MAIN_DIVIDE_BY_ZERO_EXCEPTIONS},
{&JitBase::m_alignment_exceptions, &Config::MAIN_ALIGNMENT_EXCEPTIONS},
{&JitBase::m_low_dcbz_hack, &Config::MAIN_LOW_DCBZ_HACK},
{&JitBase::m_fprf, &Config::MAIN_FPRF},
{&JitBase::m_accurate_nans, &Config::MAIN_ACCURATE_NANS},
@ -137,9 +138,11 @@ void JitBase::RefreshConfig()
bool any_watchpoints = m_system.GetPowerPC().GetMemChecks().HasAny();
jo.fastmem = m_fastmem_enabled && jo.fastmem_arena && (m_ppc_state.msr.DR || !any_watchpoints) &&
EMM::IsExceptionHandlerSupported();
jo.memcheck = m_system.IsMMUMode() || m_system.IsPauseOnPanicMode() || any_watchpoints;
jo.memcheck = m_system.IsMMUMode() || m_system.IsPauseOnPanicMode() || any_watchpoints ||
m_alignment_exceptions;
jo.fp_exceptions = m_enable_float_exceptions;
jo.div_by_zero_exceptions = m_enable_div_by_zero_exceptions;
jo.alignment_exceptions = m_alignment_exceptions;
}
void JitBase::InitFastmemArena()

View File

@ -83,6 +83,7 @@ protected:
bool accurateSinglePrecision;
bool fastmem;
bool fastmem_arena;
bool alignment_exceptions;
bool memcheck;
bool fp_exceptions;
bool div_by_zero_exceptions;
@ -152,6 +153,7 @@ protected:
bool m_enable_branch_following = false;
bool m_enable_float_exceptions = false;
bool m_enable_div_by_zero_exceptions = false;
bool m_alignment_exceptions = false;
bool m_low_dcbz_hack = false;
bool m_fprf = false;
bool m_accurate_nans = false;

View File

@ -25,6 +25,7 @@
#include "Core/PowerPC/MMU.h"
#include <algorithm>
#include <bit>
#include <cstddef>
#include <cstring>
@ -44,6 +45,7 @@
#include "Core/HW/ProcessorInterface.h"
#include "Core/PowerPC/GDBStub.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Interpreter/ExceptionUtils.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
@ -154,22 +156,6 @@ T MMU::ReadFromHardware(const u32 effective_address, const UGeckoInstruction ins
static_assert(flag == XCheckTLBFlag::NoException || flag == XCheckTLBFlag::Read ||
flag == XCheckTLBFlag::OpcodeNoException);
const u32 effective_start_page = effective_address & ~HW_PAGE_MASK;
const u32 effective_end_page = (effective_address + sizeof(T) - 1) & ~HW_PAGE_MASK;
if (effective_start_page != effective_end_page)
{
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
u64 var = 0;
for (u32 i = 0; i < sizeof(T); ++i)
{
var = (var << 8) | ReadFromHardware<flag, u8, never_translate>(effective_address + i, inst);
}
return static_cast<T>(var);
}
u32 physical_address;
bool wi;
@ -192,6 +178,27 @@ T MMU::ReadFromHardware(const u32 effective_address, const UGeckoInstruction ins
wi = false;
}
if (flag == XCheckTLBFlag::Read &&
AccessCausesAlignmentException(effective_address, sizeof(T) << 3, inst, wi))
{
GenerateAlignmentException(m_ppc_state, effective_address, inst);
return 0;
}
const u32 effective_start_page = effective_address & ~HW_PAGE_MASK;
const u32 effective_end_page = (effective_address + sizeof(T) - 1) & ~HW_PAGE_MASK;
if (effective_start_page != effective_end_page)
{
// This could be unaligned down to the byte level... hopefully this is rare, so doing it this
// way isn't too terrible.
u64 var = 0;
for (u32 i = 0; i < sizeof(T); ++i)
{
var = (var << 8) | ReadFromHardware<flag, u8, never_translate>(effective_address + i, inst);
}
return static_cast<T>(var);
}
if (flag == XCheckTLBFlag::Read && (physical_address & 0xF8000000) == 0x08000000)
{
if (physical_address < 0x0c000000)
@ -281,21 +288,6 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32
DEBUG_ASSERT(size <= 4);
const u32 effective_start_page = effective_address & ~HW_PAGE_MASK;
const u32 effective_end_page = (effective_address + size - 1) & ~HW_PAGE_MASK;
if (effective_start_page != effective_end_page)
{
// The write crosses a page boundary. Break it up into two writes.
// TODO: floats on non-word-aligned boundaries should technically cause alignment exceptions.
// Note that "word" means 32-bit, so paired singles or doubles might still be 32-bit aligned!
const u32 first_half_size = effective_end_page - effective_address;
const u32 second_half_size = size - first_half_size;
WriteToHardware<flag, never_translate>(effective_address, std::rotr(data, second_half_size * 8),
first_half_size, inst);
WriteToHardware<flag, never_translate>(effective_end_page, data, second_half_size, inst);
return;
}
u32 physical_address;
bool wi;
@ -318,6 +310,26 @@ void MMU::WriteToHardware(const u32 effective_address, const u32 data, const u32
wi = false;
}
if (flag == XCheckTLBFlag::Write &&
AccessCausesAlignmentException(effective_address, size << 3, inst, wi))
{
GenerateAlignmentException(m_ppc_state, effective_address, inst);
return;
}
const u32 effective_start_page = effective_address & ~HW_PAGE_MASK;
const u32 effective_end_page = (effective_address + size - 1) & ~HW_PAGE_MASK;
if (effective_start_page != effective_end_page)
{
// The write crosses a page boundary. Break it up into two writes.
const u32 first_half_size = effective_end_page - effective_address;
const u32 second_half_size = size - first_half_size;
WriteToHardware<flag, never_translate>(effective_address, std::rotr(data, second_half_size * 8),
first_half_size, inst);
WriteToHardware<flag, never_translate>(effective_end_page, data, second_half_size, inst);
return;
}
// Check for a gather pipe write (which are not implemented through the MMIO system).
//
// Note that we must mask the address to correctly emulate certain games; Pac-Man World 3
@ -941,7 +953,8 @@ std::optional<ReadResult<std::string>> MMU::HostTryReadString(const Core::CPUThr
return ReadResult<std::string>(c->translated, std::move(s));
}
bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size) const
bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size,
const UGeckoInstruction inst) const
{
if (m_power_pc.GetMemChecks().HasAny())
return false;
@ -952,6 +965,12 @@ bool MMU::IsOptimizableRAMAddress(const u32 address, const u32 access_size) cons
if (m_ppc_state.m_enable_dcache)
return false;
if ((address & GetAlignmentMask(access_size)) != 0 &&
AccessCausesAlignmentExceptionIfMisaligned(inst, access_size))
{
return false;
}
// We store whether an access can be optimized to an unchecked access
// in dbat_table.
const u32 last_byte_address = address + (access_size >> 3) - 1;
@ -1252,7 +1271,7 @@ u32 MMU::IsOptimizableMMIOAccess(u32 address, u32 access_size) const
return 0;
// Check whether the address is an aligned address of an MMIO register.
const bool aligned = (address & ((access_size >> 3) - 1)) == 0;
const bool aligned = (address & GetAlignmentMask(access_size)) == 0;
if (!aligned || !MMIO::IsMMIOAddress(address, m_system.IsWii()))
return 0;
@ -1760,4 +1779,76 @@ void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst)
{
mmu.Write_U64_Swap(var, address, inst);
}
static bool IsDcbz(UGeckoInstruction inst)
{
// dcbz, dcbz_l
return inst.SUBOP10 == 1014 && (inst.OPCD == 31 || inst.OPCD == 4);
}
static bool IsFloat(UGeckoInstruction inst, size_t access_size)
{
// Floating loadstore
if (inst.OPCD >= 48 && inst.OPCD < 56)
return true;
// Paired non-indexed loadstore
if (inst.OPCD >= 56 && inst.OPCD < 62)
return access_size == (inst.W ? 32 : 64);
// Paired indexed loadstore
if (inst.OPCD == 4 && inst.SUBOP10 != 1014)
return access_size == (inst.Wx ? 32 : 64);
return false;
}
static bool IsMultiword(UGeckoInstruction inst)
{
// lmw, stmw
if (inst.OPCD == 46 || inst.OPCD == 47)
return true;
if (inst.OPCD != 31)
return false;
// lswx, lswi, stswx, stswi
return inst.SUBOP10 == 533 || inst.SUBOP10 == 597 || inst.SUBOP10 == 661 || inst.SUBOP10 == 725;
}
static bool IsLwarxOrStwcx(UGeckoInstruction inst)
{
// lwarx, stwcx
return inst.OPCD == 31 && (inst.SUBOP10 == 20 || inst.SUBOP10 == 150);
}
static bool IsEciwxOrEcowx(UGeckoInstruction inst)
{
// eciwx, ecowx
return inst.OPCD == 31 && (inst.SUBOP10 == 310 || inst.SUBOP10 == 438);
}
bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst)
{
return IsDcbz(inst);
}
bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size)
{
return IsFloat(inst, access_size) || IsMultiword(inst) || IsLwarxOrStwcx(inst) ||
IsEciwxOrEcowx(inst);
}
bool AccessCausesAlignmentException(u32 effective_address, size_t access_size,
UGeckoInstruction inst, bool wi)
{
if (wi && AccessCausesAlignmentExceptionIfWi(inst))
return true;
if ((effective_address & GetAlignmentMask(access_size)) == 0)
return false;
return AccessCausesAlignmentExceptionIfMisaligned(inst, access_size);
}
} // namespace PowerPC

View File

@ -248,7 +248,7 @@ public:
// Result changes based on the BAT registers and MSR.DR. Returns whether
// it's safe to optimize a read or write to this address to an unguarded
// memory access. Does not consider page tables.
bool IsOptimizableRAMAddress(u32 address, u32 access_size) const;
bool IsOptimizableRAMAddress(u32 address, u32 access_size, UGeckoInstruction inst) const;
u32 IsOptimizableMMIOAccess(u32 address, u32 access_size) const;
bool IsOptimizableGatherPipeWrite(u32 address) const;
@ -345,4 +345,15 @@ void WriteU64FromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst);
void WriteU16SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst);
void WriteU32SwapFromJit(MMU& mmu, u32 var, u32 address, UGeckoInstruction inst);
void WriteU64SwapFromJit(MMU& mmu, u64 var, u32 address, UGeckoInstruction inst);
constexpr u32 GetAlignmentMask(size_t size)
{
return static_cast<u32>(std::min<size_t>(4, size >> 3) - 1);
}
bool AccessCausesAlignmentExceptionIfWi(UGeckoInstruction inst);
bool AccessCausesAlignmentExceptionIfMisaligned(UGeckoInstruction inst, size_t access_size);
bool AccessCausesAlignmentException(u32 effective_address, size_t access_size,
UGeckoInstruction inst, bool wi);
} // namespace PowerPC

View File

@ -553,8 +553,7 @@ void PowerPCManager::CheckExceptions()
m_ppc_state.msr.LE = m_ppc_state.msr.ILE;
m_ppc_state.msr.Hex &= ~0x04EF36;
m_ppc_state.pc = m_ppc_state.npc = 0x00000600;
// TODO crazy amount of DSISR options to check out
// DSISR and DAR regs are changed in GenerateAlignmentException()
DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT");
m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT;