Arm64Emitter: Add MOVPage2R utility function
This new function is like MOVP2R, except it masks out the lower 12 bits, returning them instead of writing them to the register. These lower 12 bits can then be used as an offset for LDR/STR. This lets us turn ADRP+ADD+LDR sequences with a zero offset into ADRP+LDR sequences with a non-zero offset, saving one instruction.
This commit is contained in:
parent
22bcf134bf
commit
d64c3dc267
|
@ -1009,12 +1009,20 @@ public:
|
||||||
void MOVP2R(ARM64Reg Rd, P* ptr)
|
void MOVP2R(ARM64Reg Rd, P* ptr)
|
||||||
{
|
{
|
||||||
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
|
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
|
||||||
MOVI2R(Rd, (uintptr_t)ptr);
|
MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr));
|
||||||
|
}
|
||||||
|
template <class P>
|
||||||
|
// Given an address, stores the page address into a register and returns the page-relative offset
|
||||||
|
s32 MOVPage2R(ARM64Reg Rd, P* ptr)
|
||||||
|
{
|
||||||
|
ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
|
||||||
|
MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr) & ~0xFFFULL);
|
||||||
|
return static_cast<s32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper around AND x, y, imm etc.
|
// Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded
|
||||||
// If you are sure the imm will work, preferably construct a LogicalImm directly instead,
|
// without a scratch register, preferably construct a LogicalImm directly instead,
|
||||||
// since that is constexpr and thus can be done at compile-time for constant values.
|
// since that is constexpr and thus can be done at compile time for constant values.
|
||||||
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
||||||
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
||||||
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||||
|
@ -1024,6 +1032,7 @@ public:
|
||||||
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
||||||
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
|
||||||
|
|
||||||
|
// Wrappers around arithmetic operations with an immediate.
|
||||||
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
|
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
|
||||||
ARM64Reg scratch);
|
ARM64Reg scratch);
|
||||||
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
|
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
|
||||||
|
|
|
@ -915,8 +915,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
SetJumpTarget(exception);
|
SetJumpTarget(exception);
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
|
TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE
|
||||||
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause);
|
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30,
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0);
|
MOVPage2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause));
|
||||||
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
||||||
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
||||||
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
||||||
|
@ -951,8 +951,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
SetJumpTarget(exception);
|
SetJumpTarget(exception);
|
||||||
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
TBZ(WA, 15, done_here); // MSR.EE
|
TBZ(WA, 15, done_here); // MSR.EE
|
||||||
MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
|
LDR(IndexType::Unsigned, WA, XA, MOVPage2R(XA, &ProcessorInterface::m_InterruptCause));
|
||||||
LDR(IndexType::Unsigned, WA, XA, 0);
|
|
||||||
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP |
|
||||||
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
ProcessorInterface::INT_CAUSE_PE_TOKEN |
|
||||||
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
ProcessorInterface::INT_CAUSE_PE_FINISH;
|
||||||
|
|
|
@ -42,10 +42,9 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
// Swap the stack pointer, so we have proper guard pages.
|
// Swap the stack pointer, so we have proper guard pages.
|
||||||
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
|
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
|
||||||
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
|
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
|
||||||
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
|
MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
|
||||||
MOVP2R(ARM64Reg::X1, &m_stack_pointer);
|
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer));
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
|
|
||||||
FixupBranch no_fake_stack = CBZ(ARM64Reg::X0);
|
FixupBranch no_fake_stack = CBZ(ARM64Reg::X0);
|
||||||
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
|
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
|
||||||
SetJumpTarget(no_fake_stack);
|
SetJumpTarget(no_fake_stack);
|
||||||
|
@ -167,8 +166,7 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
// Check the state pointer to see if we are exiting
|
// Check the state pointer to see if we are exiting
|
||||||
// Gets checked on at the end of every slice
|
// Gets checked on at the end of every slice
|
||||||
MOVP2R(ARM64Reg::X0, CPU::GetStatePtr());
|
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr()));
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0);
|
|
||||||
|
|
||||||
CMP(ARM64Reg::W0, 0);
|
CMP(ARM64Reg::W0, 0);
|
||||||
FixupBranch Exit = B(CC_NEQ);
|
FixupBranch Exit = B(CC_NEQ);
|
||||||
|
@ -186,8 +184,8 @@ void JitArm64::GenerateAsm()
|
||||||
SetJumpTarget(Exit);
|
SetJumpTarget(Exit);
|
||||||
|
|
||||||
// Reset the stack pointer, as the BLR optimization have touched it.
|
// Reset the stack pointer, as the BLR optimization have touched it.
|
||||||
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
|
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1,
|
||||||
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
|
MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer));
|
||||||
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
|
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
|
||||||
|
|
||||||
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
|
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
|
||||||
|
@ -526,9 +524,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -544,9 +542,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -561,9 +559,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -578,9 +576,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -607,9 +605,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -625,9 +623,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -642,9 +640,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -659,9 +657,9 @@ void JitArm64::GenerateQuantizedLoads()
|
||||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
||||||
MOVP2R(ARM64Reg::X2, &m_dequantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
RET(ARM64Reg::X30);
|
RET(ARM64Reg::X30);
|
||||||
}
|
}
|
||||||
|
@ -727,9 +725,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storePairedU8 = GetCodePtr();
|
const u8* storePairedU8 = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
|
|
||||||
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -746,9 +744,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storePairedS8 = GetCodePtr();
|
const u8* storePairedS8 = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
|
|
||||||
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -765,9 +763,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storePairedU16 = GetCodePtr();
|
const u8* storePairedU16 = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
|
|
||||||
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -783,9 +781,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
|
const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
|
||||||
|
|
||||||
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -812,9 +810,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
|
const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
||||||
|
|
||||||
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -831,9 +829,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storeSingleS8 = GetCodePtr();
|
const u8* storeSingleS8 = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
||||||
|
|
||||||
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -850,9 +848,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
|
const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
||||||
|
|
||||||
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
@ -868,9 +866,9 @@ void JitArm64::GenerateQuantizedStores()
|
||||||
}
|
}
|
||||||
const u8* storeSingleS16 = GetCodePtr();
|
const u8* storeSingleS16 = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
|
const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
|
||||||
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
|
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset);
|
||||||
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
|
||||||
|
|
||||||
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||||
|
|
|
@ -34,18 +34,18 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void StoreFromRegister(int sbits, ARM64Reg reg)
|
void StoreFromRegister(int sbits, ARM64Reg reg, s32 offset)
|
||||||
{
|
{
|
||||||
switch (sbits)
|
switch (sbits)
|
||||||
{
|
{
|
||||||
case 8:
|
case 8:
|
||||||
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
|
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
|
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
|
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits);
|
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits);
|
||||||
|
@ -55,20 +55,20 @@ private:
|
||||||
|
|
||||||
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
|
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
|
||||||
{
|
{
|
||||||
m_emit->MOVP2R(ARM64Reg::X0, ptr);
|
const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
|
||||||
|
|
||||||
// If we do not need to mask, we can do the sign extend while loading
|
// If we do not need to mask, we can do the sign extend while loading
|
||||||
// from memory. If masking is required, we have to first zero extend,
|
// from memory. If masking is required, we have to first zero extend,
|
||||||
// then mask, then sign extend if needed (1 instr vs. ~4).
|
// then mask, then sign extend if needed (1 instr vs. ~4).
|
||||||
u32 all_ones = (1ULL << sbits) - 1;
|
const u32 all_ones = (1ULL << sbits) - 1;
|
||||||
if ((all_ones & mask) == all_ones)
|
if ((all_ones & mask) == all_ones)
|
||||||
{
|
{
|
||||||
StoreFromRegister(sbits, m_src_reg);
|
StoreFromRegister(sbits, m_src_reg, offset);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1);
|
m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1);
|
||||||
StoreFromRegister(sbits, ARM64Reg::W1);
|
StoreFromRegister(sbits, ARM64Reg::W1, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,24 +123,24 @@ private:
|
||||||
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
|
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoadToRegister(int sbits, bool dont_extend)
|
void LoadToRegister(int sbits, bool dont_extend, s32 offset)
|
||||||
{
|
{
|
||||||
switch (sbits)
|
switch (sbits)
|
||||||
{
|
{
|
||||||
case 8:
|
case 8:
|
||||||
if (m_sign_extend && !dont_extend)
|
if (m_sign_extend && !dont_extend)
|
||||||
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
|
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
|
||||||
else
|
else
|
||||||
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
|
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
if (m_sign_extend && !dont_extend)
|
if (m_sign_extend && !dont_extend)
|
||||||
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
|
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
|
||||||
else
|
else
|
||||||
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
|
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
|
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits);
|
ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits);
|
||||||
|
@ -150,19 +150,19 @@ private:
|
||||||
|
|
||||||
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
|
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
|
||||||
{
|
{
|
||||||
m_emit->MOVP2R(ARM64Reg::X0, ptr);
|
const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr);
|
||||||
|
|
||||||
// If we do not need to mask, we can do the sign extend while loading
|
// If we do not need to mask, we can do the sign extend while loading
|
||||||
// from memory. If masking is required, we have to first zero extend,
|
// from memory. If masking is required, we have to first zero extend,
|
||||||
// then mask, then sign extend if needed (1 instr vs. ~4).
|
// then mask, then sign extend if needed (1 instr vs. ~4).
|
||||||
u32 all_ones = (1ULL << sbits) - 1;
|
const u32 all_ones = (1ULL << sbits) - 1;
|
||||||
if ((all_ones & mask) == all_ones)
|
if ((all_ones & mask) == all_ones)
|
||||||
{
|
{
|
||||||
LoadToRegister(sbits, false);
|
LoadToRegister(sbits, false, offset);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LoadToRegister(sbits, true);
|
LoadToRegister(sbits, true, offset);
|
||||||
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0);
|
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0);
|
||||||
if (m_sign_extend)
|
if (m_sign_extend)
|
||||||
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
|
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
|
||||||
|
|
Loading…
Reference in New Issue