Arm64Emitter: Add MOVPage2R utility function

This new function is like MOVP2R, except it masks out the lower 12 bits,
returning them instead of writing them to the register. These lower
12 bits can then be used as an offset for LDR/STR. This lets us turn
ADRP+ADD+LDR sequences with a zero offset into ADRP+LDR sequences with
a non-zero offset, saving one instruction.
This commit is contained in:
JosJuice 2022-11-21 22:39:15 +01:00
parent 22bcf134bf
commit d64c3dc267
4 changed files with 72 additions and 66 deletions

View File

@ -1009,12 +1009,20 @@ public:
void MOVP2R(ARM64Reg Rd, P* ptr)
{
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, (uintptr_t)ptr);
MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr));
}
template <class P>
// Given an address, stores the page address into a register and returns the page-relative offset
s32 MOVPage2R(ARM64Reg Rd, P* ptr)
{
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr) & ~0xFFFULL);
return static_cast<s32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFULL);
}
// Wrapper around AND x, y, imm etc.
// If you are sure the imm will work, preferably construct a LogicalImm directly instead,
// since that is constexpr and thus can be done at compile-time for constant values.
// Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded
// without a scratch register, preferably construct a LogicalImm directly instead,
// since that is constexpr and thus can be done at compile time for constant values.
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
@ -1024,6 +1032,7 @@ public:
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
// Wrappers around arithmetic operations with an immediate.
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
ARM64Reg scratch);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);

View File

@ -915,8 +915,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SetJumpTarget(exception);
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0);
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30,
MOVPage2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause));
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;
@ -951,8 +951,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
SetJumpTarget(exception);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 15, done_here); // MSR.EE
MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, WA, XA, 0);
LDR(IndexType::Unsigned, WA, XA, MOVPage2R(XA, &ProcessorInterface::m_InterruptCause));
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
ProcessorInterface::INT_CAUSE_PE_TOKEN |
ProcessorInterface::INT_CAUSE_PE_FINISH;

View File

@ -42,10 +42,9 @@ void JitArm64::GenerateAsm()
// Swap the stack pointer, so we have proper guard pages.
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
MOVP2R(ARM64Reg::X1, &m_stack_pointer);
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer));
FixupBranch no_fake_stack = CBZ(ARM64Reg::X0);
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
SetJumpTarget(no_fake_stack);
@ -167,8 +166,7 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
MOVP2R(ARM64Reg::X0, CPU::GetStatePtr());
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0);
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
CMP(ARM64Reg::W0, 0);
FixupBranch Exit = B(CC_NEQ);
@ -186,8 +184,8 @@ void JitArm64::GenerateAsm()
SetJumpTarget(Exit);
// Reset the stack pointer, as the BLR optimization have touched it.
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
@ -526,9 +524,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -544,9 +542,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -561,9 +559,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -578,9 +576,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -607,9 +605,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -625,9 +623,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -642,9 +640,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -659,9 +657,9 @@ void JitArm64::GenerateQuantizedLoads()
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
@ -727,9 +725,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU8 = GetCodePtr();
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -746,9 +744,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS8 = GetCodePtr();
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -765,9 +763,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedU16 = GetCodePtr();
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -783,9 +781,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -812,9 +810,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -831,9 +829,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS8 = GetCodePtr();
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
@ -850,9 +848,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
@ -868,9 +866,9 @@ void JitArm64::GenerateQuantizedStores()
}
const u8* storeSingleS16 = GetCodePtr();
{
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);

View File

@ -34,18 +34,18 @@ public:
}
private:
void StoreFromRegister(int sbits, ARM64Reg reg)
void StoreFromRegister(int sbits, ARM64Reg reg, s32 offset)
{
switch (sbits)
{
case 8:
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break;
case 16:
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break;
case 32:
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits);
@ -55,20 +55,20 @@ private:
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVP2R(ARM64Reg::X0, ptr);
const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1;
const u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones)
{
StoreFromRegister(sbits, m_src_reg);
StoreFromRegister(sbits, m_src_reg, offset);
}
else
{
m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1);
StoreFromRegister(sbits, ARM64Reg::W1);
StoreFromRegister(sbits, ARM64Reg::W1, offset);
}
}
@ -123,24 +123,24 @@ private:
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
}
void LoadToRegister(int sbits, bool dont_extend)
void LoadToRegister(int sbits, bool dont_extend, s32 offset)
{
switch (sbits)
{
case 8:
if (m_sign_extend && !dont_extend)
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
else
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break;
case 16:
if (m_sign_extend && !dont_extend)
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
else
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break;
case 32:
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits);
@ -150,19 +150,19 @@ private:
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVP2R(ARM64Reg::X0, ptr);
const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
// then mask, then sign extend if needed (1 instr vs. ~4).
u32 all_ones = (1ULL << sbits) - 1;
const u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones)
{
LoadToRegister(sbits, false);
LoadToRegister(sbits, false, offset);
}
else
{
LoadToRegister(sbits, true);
LoadToRegister(sbits, true, offset);
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);