Merge pull request #9494 from Dentomologist/convert_arm64reg_to_enum_class

Arm64Gen: Convert ARM64Reg to enum class
This commit is contained in:
JosJuice 2021-03-17 00:05:23 +01:00 committed by GitHub
commit a45a0a2066
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 859 additions and 962 deletions

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@ namespace Arm64Gen
// 010 - VFP single precision
// 100 - VFP double precision
// 110 - VFP quad precision
enum ARM64Reg
enum class ARM64Reg
{
// 32bit registers
W0 = 0,
@ -224,9 +224,21 @@ enum ARM64Reg
WZR = WSP,
ZR = SP,
INVALID_REG = 0xFFFFFFFF
INVALID_REG = -1,
};
constexpr int operator&(const ARM64Reg& reg, const int mask)
{
return static_cast<int>(reg) & mask;
}
constexpr int operator|(const ARM64Reg& reg, const int mask)
{
return static_cast<int>(reg) | mask;
}
constexpr ARM64Reg operator+(const ARM64Reg& reg, const int addend)
{
return static_cast<ARM64Reg>(static_cast<int>(reg) + addend);
}
constexpr bool Is64Bit(ARM64Reg reg)
{
return (reg & 0x20) != 0;
@ -256,9 +268,13 @@ constexpr bool IsGPR(ARM64Reg reg)
return static_cast<int>(reg) < 0x40;
}
constexpr ARM64Reg DecodeReg(ARM64Reg reg)
constexpr int DecodeReg(ARM64Reg reg)
{
return static_cast<ARM64Reg>(reg & 0x1F);
return reg & 0x1F;
}
constexpr ARM64Reg EncodeRegTo32(ARM64Reg reg)
{
return static_cast<ARM64Reg>(DecodeReg(reg));
}
constexpr ARM64Reg EncodeRegTo64(ARM64Reg reg)
{
@ -266,7 +282,7 @@ constexpr ARM64Reg EncodeRegTo64(ARM64Reg reg)
}
constexpr ARM64Reg EncodeRegToSingle(ARM64Reg reg)
{
return static_cast<ARM64Reg>(DecodeReg(reg) + S0);
return static_cast<ARM64Reg>(ARM64Reg::S0 | DecodeReg(reg));
}
constexpr ARM64Reg EncodeRegToDouble(ARM64Reg reg)
{
@ -578,7 +594,7 @@ public:
// Unconditional Branch (register)
void BR(ARM64Reg Rn);
void BLR(ARM64Reg Rn);
void RET(ARM64Reg Rn = X30);
void RET(ARM64Reg Rn = ARM64Reg::X30);
void ERET();
void DRPS();
@ -648,15 +664,15 @@ public:
// Aliases
void CSET(ARM64Reg Rd, CCFlags cond)
{
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
ARM64Reg zr = Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR;
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
}
void CSETM(ARM64Reg Rd, CCFlags cond)
{
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
ARM64Reg zr = Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR;
CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
}
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs); }
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
// Data-Processing 1 source
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
void REV16(ARM64Reg Rd, ARM64Reg Rn);
@ -704,10 +720,10 @@ public:
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void TST(ARM64Reg Rn, ARM64Reg Rm) { ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm); }
void TST(ARM64Reg Rn, ARM64Reg Rm) { ANDS(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm); }
void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
{
ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);
ANDS(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm, Shift);
}
// Wrap the above for saner syntax
@ -879,22 +895,22 @@ public:
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch
// register.
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG)
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG)
{
ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch);
ANDSI2R(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm, scratch);
}
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
ARM64Reg scratch);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
@ -925,9 +941,9 @@ public:
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
{
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
MOVP2R(X8, trampoline);
MOVP2R(X0, const_cast<void*>((const void*)f));
return X8;
MOVP2R(ARM64Reg::X8, trampoline);
MOVP2R(ARM64Reg::X0, const_cast<void*>((const void*)f));
return ARM64Reg::X8;
}
// Plain function call
@ -962,9 +978,9 @@ public:
// Loadstore multiple structure
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = ARM64Reg::SP);
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = ARM64Reg::SP);
// Loadstore paired
void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
@ -1109,12 +1125,13 @@ public:
void MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift = 0);
void BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = ARM64Reg::INVALID_REG,
bool negate = false);
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = ARM64Reg::INVALID_REG);
// ABI related
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = ARM64Reg::INVALID_REG);
void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = ARM64Reg::INVALID_REG);
private:
ARM64XEmitter* m_emit;

View File

@ -157,9 +157,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
}
Interpreter::Instruction instr = PPCTables::GetInterpreterOp(inst);
MOVP2R(X8, instr);
MOVI2R(W0, inst.hex);
BLR(X8);
MOVP2R(ARM64Reg::X8, instr);
MOVI2R(ARM64Reg::W0, inst.hex);
BLR(ARM64Reg::X8);
if (js.op->opinfo->flags & FL_ENDBLOCK)
{
@ -213,10 +213,10 @@ void JitArm64::HLEFunction(u32 hook_index)
gpr.Flush(FlushMode::All);
fpr.Flush(FlushMode::All);
MOVP2R(X8, &HLE::Execute);
MOVI2R(W0, js.compilerPC);
MOVI2R(W1, hook_index);
BLR(X8);
MOVP2R(ARM64Reg::X8, &HLE::Execute);
MOVI2R(ARM64Reg::W0, js.compilerPC);
MOVI2R(ARM64Reg::W1, hook_index);
BLR(ARM64Reg::X8);
}
void JitArm64::DoNothing(UGeckoInstruction inst)
@ -236,31 +236,31 @@ void JitArm64::Cleanup()
{
static_assert(PPCSTATE_OFF(gather_pipe_ptr) <= 504);
static_assert(PPCSTATE_OFF(gather_pipe_ptr) + 8 == PPCSTATE_OFF(gather_pipe_base_ptr));
LDP(IndexType::Signed, X0, X1, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
SUB(X0, X0, X1);
CMP(X0, GPFifo::GATHER_PIPE_SIZE);
LDP(IndexType::Signed, ARM64Reg::X0, ARM64Reg::X1, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
SUB(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1);
CMP(ARM64Reg::X0, GPFifo::GATHER_PIPE_SIZE);
FixupBranch exit = B(CC_LT);
MOVP2R(X0, &GPFifo::UpdateGatherPipe);
BLR(X0);
MOVP2R(ARM64Reg::X0, &GPFifo::UpdateGatherPipe);
BLR(ARM64Reg::X0);
SetJumpTarget(exit);
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0.Hex || MMCR1.Hex)
{
MOVP2R(X8, &PowerPC::UpdatePerformanceMonitor);
MOVI2R(X0, js.downcountAmount);
MOVI2R(X1, js.numLoadStoreInst);
MOVI2R(X2, js.numFloatingPointInst);
BLR(X8);
MOVP2R(ARM64Reg::X8, &PowerPC::UpdatePerformanceMonitor);
MOVI2R(ARM64Reg::X0, js.downcountAmount);
MOVI2R(ARM64Reg::X1, js.numLoadStoreInst);
MOVI2R(ARM64Reg::X2, js.numFloatingPointInst);
BLR(ARM64Reg::X8);
}
}
void JitArm64::DoDownCount()
{
LDR(IndexType::Unsigned, W0, PPC_REG, PPCSTATE_OFF(downcount));
SUBSI2R(W0, W0, js.downcountAmount, W1);
STR(IndexType::Unsigned, W0, PPC_REG, PPCSTATE_OFF(downcount));
LDR(IndexType::Unsigned, ARM64Reg::W0, PPC_REG, PPCSTATE_OFF(downcount));
SUBSI2R(ARM64Reg::W0, ARM64Reg::W0, js.downcountAmount, ARM64Reg::W1);
STR(IndexType::Unsigned, ARM64Reg::W0, PPC_REG, PPCSTATE_OFF(downcount));
}
void JitArm64::ResetStack()
@ -268,8 +268,8 @@ void JitArm64::ResetStack()
if (!m_enable_blr_optimization)
return;
LDR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
ADD(SP, X0, 0);
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
}
void JitArm64::AllocStack()
@ -317,9 +317,9 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
if (LK)
{
// Push {ARM_PC+20; PPC_PC} on the stack
MOVI2R(X1, exit_address_after_return);
ADR(X0, 20);
STP(IndexType::Pre, X0, X1, SP, -16);
MOVI2R(ARM64Reg::X1, exit_address_after_return);
ADR(ARM64Reg::X0, 20);
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
}
JitBlock* b = js.curBlock;
@ -363,9 +363,9 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
else
{
// Push {ARM_PC, PPC_PC} on the stack
MOVI2R(X1, exit_address_after_return);
ADR(X0, 12);
STP(IndexType::Pre, X0, X1, SP, -16);
MOVI2R(ARM64Reg::X1, exit_address_after_return);
ADR(ARM64Reg::X0, 12);
STP(IndexType::Pre, ARM64Reg::X0, ARM64Reg::X1, ARM64Reg::SP, -16);
BL(dispatcher);
@ -393,7 +393,7 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return)
ARM64Reg code_reg = gpr.GetReg();
MOVI2R(after_reg, exit_address_after_return);
ADR(EncodeRegTo64(code_reg), 12);
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), SP, -16);
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
gpr.Unlock(after_reg, code_reg);
FixupBranch skip_exit = BL();
@ -427,13 +427,13 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
EndTimeProfile(js.curBlock);
// Check if {ARM_PC, PPC_PC} matches the current state.
LDP(IndexType::Post, X2, X1, SP, 16);
CMP(W1, DISPATCHER_PC);
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
CMP(ARM64Reg::W1, DISPATCHER_PC);
FixupBranch no_match = B(CC_NEQ);
DoDownCount(); // overwrites X0 + X1
RET(X2);
RET(ARM64Reg::X2);
SetJumpTarget(no_match);
@ -448,19 +448,19 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
{
Cleanup();
LDR(IndexType::Unsigned, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(Exceptions));
MOVI2R(DISPATCHER_PC, destination);
FixupBranch no_exceptions = CBZ(W30);
FixupBranch no_exceptions = CBZ(ARM64Reg::W30);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
if (only_external)
MOVP2R(X8, &PowerPC::CheckExternalExceptions);
MOVP2R(ARM64Reg::X8, &PowerPC::CheckExternalExceptions);
else
MOVP2R(X8, &PowerPC::CheckExceptions);
BLR(X8);
MOVP2R(ARM64Reg::X8, &PowerPC::CheckExceptions);
BLR(ARM64Reg::X8);
LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
@ -479,8 +479,8 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
Cleanup();
LDR(IndexType::Unsigned, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_exceptions = CBZ(W30);
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_exceptions = CBZ(ARM64Reg::W30);
static_assert(PPCSTATE_OFF(pc) <= 252);
static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc));
@ -527,15 +527,16 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
void JitArm64::BeginTimeProfile(JitBlock* b)
{
MOVP2R(X0, &b->profile_data);
LDR(IndexType::Unsigned, X1, X0, offsetof(JitBlock::ProfileData, runCount));
ADD(X1, X1, 1);
MOVP2R(ARM64Reg::X0, &b->profile_data);
LDR(IndexType::Unsigned, ARM64Reg::X1, ARM64Reg::X0, offsetof(JitBlock::ProfileData, runCount));
ADD(ARM64Reg::X1, ARM64Reg::X1, 1);
// Fetch the current counter register
CNTVCT(X2);
CNTVCT(ARM64Reg::X2);
// stores runCount and ticStart
STP(IndexType::Signed, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
STP(IndexType::Signed, ARM64Reg::X1, ARM64Reg::X2, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, runCount));
}
void JitArm64::EndTimeProfile(JitBlock* b)
@ -544,20 +545,22 @@ void JitArm64::EndTimeProfile(JitBlock* b)
return;
// Fetch the current counter register
CNTVCT(X1);
CNTVCT(ARM64Reg::X1);
MOVP2R(X0, &b->profile_data);
MOVP2R(ARM64Reg::X0, &b->profile_data);
LDR(IndexType::Unsigned, X2, X0, offsetof(JitBlock::ProfileData, ticStart));
SUB(X1, X1, X2);
LDR(IndexType::Unsigned, ARM64Reg::X2, ARM64Reg::X0, offsetof(JitBlock::ProfileData, ticStart));
SUB(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X2);
// loads ticCounter and downcountCounter
LDP(IndexType::Signed, X2, X3, X0, offsetof(JitBlock::ProfileData, ticCounter));
ADD(X2, X2, X1);
ADDI2R(X3, X3, js.downcountAmount, X1);
LDP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, ticCounter));
ADD(ARM64Reg::X2, ARM64Reg::X2, ARM64Reg::X1);
ADDI2R(ARM64Reg::X3, ARM64Reg::X3, js.downcountAmount, ARM64Reg::X1);
// stores ticCounter and downcountCounter
STP(IndexType::Signed, X2, X3, X0, offsetof(JitBlock::ProfileData, ticCounter));
STP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, ticCounter));
}
void JitArm64::Run()
@ -666,16 +669,16 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
int gqr = *code_block.m_gqr_used.begin();
if (!code_block.m_gqr_modified[gqr] && !GQR(gqr))
{
LDR(IndexType::Unsigned, W0, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + gqr));
FixupBranch no_fail = CBZ(W0);
LDR(IndexType::Unsigned, ARM64Reg::W0, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + gqr));
FixupBranch no_fail = CBZ(ARM64Reg::W0);
FixupBranch fail = B();
SwitchToFarCode();
SetJumpTarget(fail);
MOVI2R(DISPATCHER_PC, js.blockStart);
STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(W0, static_cast<u32>(JitInterface::ExceptionType::PairedQuantize));
MOVP2R(X1, &JitInterface::CompileExceptionCheck);
BLR(X1);
MOVI2R(ARM64Reg::W0, static_cast<u32>(JitInterface::ExceptionType::PairedQuantize));
MOVP2R(ARM64Reg::X1, &JitInterface::CompileExceptionCheck);
BLR(ARM64Reg::X1);
B(dispatcher_no_check);
SwitchToNearCode();
SetJumpTarget(no_fail);
@ -719,10 +722,10 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.fifoBytesSinceCheck = 0;
js.mustCheckFifo = false;
gpr.Lock(W30);
gpr.Lock(ARM64Reg::W30);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W30] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
FixupBranch Exception = B();
SwitchToFarCode();
@ -730,20 +733,20 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
FixupBranch exit = B();
SetJumpTarget(Exception);
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
MOVP2R(X8, &GPFifo::FastCheckGatherPipe);
BLR(X8);
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
MOVP2R(ARM64Reg::X8, &GPFifo::FastCheckGatherPipe);
BLR(ARM64Reg::X8);
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
ABI_PopRegisters(regs_in_use);
// Inline exception check
LDR(IndexType::Unsigned, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
LDR(IndexType::Unsigned, W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(W30, 11, done_here);
MOVP2R(X30, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, W30, X30, 0);
TST(W30, 23, 2);
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(Exceptions));
TBZ(ARM64Reg::W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(ARM64Reg::W30, 11, done_here);
MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause);
LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0);
TST(ARM64Reg::W30, 23, 2);
B(CC_EQ, done_here);
gpr.Flush(FlushMode::MaintainState);
@ -751,7 +754,7 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
WriteExceptionExit(js.compilerPC, true);
SwitchToNearCode();
SetJumpTarget(exit);
gpr.Unlock(W30);
gpr.Unlock(ARM64Reg::W30);
// So we don't check exceptions twice
gatherPipeIntCheck = false;

View File

@ -62,30 +62,30 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.FCVT(32, 64, D0, RS);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, D0, MEM_REG, addr);
m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS);
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.REV32(8, D0, RS);
m_float_emit.STR(32, D0, MEM_REG, addr);
m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
{
m_float_emit.FCVTN(32, D0, RS);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(64, Q0, MEM_REG, addr);
m_float_emit.FCVTN(32, ARM64Reg::D0, RS);
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.REV32(8, D0, RS);
m_float_emit.STR(64, Q0, MEM_REG, addr);
m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
}
else
{
m_float_emit.REV64(8, Q0, RS);
m_float_emit.STR(64, Q0, MEM_REG, addr);
m_float_emit.REV64(8, ARM64Reg::Q0, RS);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT)
@ -103,7 +103,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg temp = W0;
ARM64Reg temp = ARM64Reg::W0;
if (flags & BackPatchInfo::FLAG_SIZE_32)
REV32(temp, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
@ -120,8 +120,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
// This literally only stores 32bytes of zeros to the target address
ADD(addr, addr, MEM_REG);
STP(IndexType::Signed, ZR, ZR, addr, 0);
STP(IndexType::Signed, ZR, ZR, addr, 16);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, addr, 0);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, addr, 16);
}
else
{
@ -179,112 +179,112 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
}
ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, X30);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT)
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.FCVT(32, 64, D0, RS);
m_float_emit.UMOV(32, W0, Q0, 0);
MOVP2R(X8, &PowerPC::Write_U32);
BLR(X8);
m_float_emit.FCVT(32, 64, ARM64Reg::D0, RS);
m_float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
BLR(ARM64Reg::X8);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.UMOV(32, W0, RS, 0);
MOVP2R(X8, &PowerPC::Write_U32);
BLR(X8);
m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
BLR(ARM64Reg::X8);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
{
m_float_emit.FCVTN(32, D0, RS);
m_float_emit.UMOV(64, X0, D0, 0);
ROR(X0, X0, 32);
MOVP2R(X8, &PowerPC::Write_U64);
BLR(X8);
m_float_emit.FCVTN(32, ARM64Reg::D0, RS);
m_float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::D0, 0);
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
BLR(ARM64Reg::X8);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.UMOV(64, X0, RS, 0);
ROR(X0, X0, 32);
MOVP2R(X8, &PowerPC::Write_U64);
BLR(X8);
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
BLR(ARM64Reg::X8);
}
else
{
MOVP2R(X8, &PowerPC::Write_U64);
m_float_emit.UMOV(64, X0, RS, 0);
BLR(X8);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
BLR(ARM64Reg::X8);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT)
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
MOVP2R(X8, &PowerPC::Read_U32);
BLR(X8);
m_float_emit.INS(32, RS, 0, X0);
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32);
BLR(ARM64Reg::X8);
m_float_emit.INS(32, RS, 0, ARM64Reg::X0);
}
else
{
MOVP2R(X8, &PowerPC::Read_F64);
BLR(X8);
m_float_emit.INS(64, RS, 0, X0);
MOVP2R(ARM64Reg::X8, &PowerPC::Read_F64);
BLR(ARM64Reg::X8);
m_float_emit.INS(64, RS, 0, ARM64Reg::X0);
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
MOV(W0, RS);
MOV(ARM64Reg::W0, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32)
MOVP2R(X8, &PowerPC::Write_U32);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
MOVP2R(X8, &PowerPC::Write_U16);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U16);
else
MOVP2R(X8, &PowerPC::Write_U8);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U8);
BLR(X8);
BLR(ARM64Reg::X8);
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
MOVP2R(X8, &PowerPC::ClearCacheLine);
BLR(X8);
MOVP2R(ARM64Reg::X8, &PowerPC::ClearCacheLine);
BLR(ARM64Reg::X8);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
MOVP2R(X8, &PowerPC::Read_U32);
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
MOVP2R(X8, &PowerPC::Read_U16);
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
MOVP2R(X8, &PowerPC::Read_U8);
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U8);
BLR(X8);
BLR(ARM64Reg::X8);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
MOV(RS, W0);
MOV(RS, ARM64Reg::W0);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
REV32(RS, W0);
REV32(RS, ARM64Reg::W0);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
REV16(RS, W0);
REV16(RS, ARM64Reg::W0);
}
if (flags & BackPatchInfo::FLAG_EXTEND)
SXTH(RS, RS);
}
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
ABI_PopRegisters(gprs_to_push);
}
if (in_far_code)
{
RET(X30);
RET(ARM64Reg::X30);
SwitchToNearCode();
}
}

View File

@ -236,7 +236,7 @@ void JitArm64::bclrx(UGeckoInstruction inst)
(inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0;
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = inst.LK ? gpr.GetReg() : INVALID_REG;
ARM64Reg WB = inst.LK ? gpr.GetReg() : ARM64Reg::INVALID_REG;
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR

View File

@ -303,7 +303,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3;
ORR(XA, ZR, 32, 0, true);
ORR(XA, ARM64Reg::ZR, 32, 0, true);
m_float_emit.FCMP(VA, VB);

View File

@ -27,7 +27,7 @@ void JitArm64::ComputeRC0(u64 imm)
gpr.BindCRToRegister(0, false);
MOVI2R(gpr.CR(0), imm);
if (imm & 0x80000000)
SXTW(gpr.CR(0), DecodeReg(gpr.CR(0)));
SXTW(gpr.CR(0), EncodeRegTo32(gpr.CR(0)));
}
void JitArm64::ComputeCarry(ARM64Reg reg)
@ -64,7 +64,7 @@ void JitArm64::ComputeCarry(bool Carry)
return;
}
STRB(IndexType::Unsigned, WSP, PPC_REG, PPCSTATE_OFF(xer_ca));
STRB(IndexType::Unsigned, ARM64Reg::WSP, PPC_REG, PPCSTATE_OFF(xer_ca));
}
void JitArm64::ComputeCarry()
@ -89,7 +89,7 @@ void JitArm64::FlushCarry()
return;
ARM64Reg WA = gpr.GetReg();
CSINC(WA, WSP, WSP, CC_CC);
CSINC(WA, ARM64Reg::WSP, ARM64Reg::WSP, CC_CC);
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
@ -417,7 +417,7 @@ void JitArm64::negx(UGeckoInstruction inst)
else
{
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), WSP, gpr.R(a));
SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a));
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
@ -481,7 +481,7 @@ void JitArm64::cmpl(UGeckoInstruction inst)
if (gpr.IsImm(b) && !gpr.GetImm(b))
{
MOV(DecodeReg(CR), gpr.R(a));
MOV(EncodeRegTo32(CR), gpr.R(a));
return;
}
@ -537,7 +537,7 @@ void JitArm64::cmpli(UGeckoInstruction inst)
if (!B)
{
MOV(DecodeReg(CR), gpr.R(a));
MOV(EncodeRegTo32(CR), gpr.R(a));
return;
}
@ -670,7 +670,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
if (js.op->wantsCA)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg dest = inplace_carry ? WA : WSP;
ARM64Reg dest = inplace_carry ? WA : ARM64Reg::WSP;
if (a != s)
{
ASR(RA, RS, amount);
@ -689,7 +689,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
}
else
{
CSINC(WA, WSP, WSP, CC_EQ);
CSINC(WA, ARM64Reg::WSP, ARM64Reg::WSP, CC_EQ);
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
}
gpr.Unlock(WA);
@ -844,7 +844,7 @@ void JitArm64::addzex(UGeckoInstruction inst)
if (js.carryFlagSet)
{
gpr.BindToRegister(d, d == a);
ADCS(gpr.R(d), gpr.R(a), WZR);
ADCS(gpr.R(d), gpr.R(a), ARM64Reg::WZR);
}
else if (d == a)
{
@ -913,7 +913,7 @@ void JitArm64::subfex(UGeckoInstruction inst)
if (js.carryFlagSet)
{
MOVI2R(WA, ~i + j);
ADC(gpr.R(d), WA, WZR);
ADC(gpr.R(d), WA, ARM64Reg::WZR);
}
else
{
@ -1011,7 +1011,7 @@ void JitArm64::subfzex(UGeckoInstruction inst)
if (js.carryFlagSet)
{
MVN(gpr.R(d), gpr.R(a));
ADCS(gpr.R(d), gpr.R(d), WZR);
ADCS(gpr.R(d), gpr.R(d), ARM64Reg::WZR);
}
else
{
@ -1074,7 +1074,7 @@ void JitArm64::addex(UGeckoInstruction inst)
if (js.carryFlagSet)
{
MOVI2R(WA, i + j);
ADC(gpr.R(d), WA, WZR);
ADC(gpr.R(d), WA, ARM64Reg::WZR);
}
else
{
@ -1503,7 +1503,7 @@ void JitArm64::srawx(UGeckoInstruction inst)
SetJumpTarget(bit_is_not_zero);
CMP(RS, 0);
CSET(WA, CC_LT);
CSINV(WB, WZR, WZR, CC_GE);
CSINV(WB, ARM64Reg::WZR, ARM64Reg::WZR, CC_GE);
SetJumpTarget(is_zero);
SetJumpTarget(bit_is_zero);

View File

@ -25,12 +25,12 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W30);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = INVALID_REG;
ARM64Reg off_reg = INVALID_REG;
ARM64Reg up_reg = ARM64Reg::INVALID_REG;
ARM64Reg off_reg = ARM64Reg::INVALID_REG;
if (addr != -1 && !gpr.IsImm(addr))
up_reg = gpr.R(addr);
@ -38,7 +38,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);
ARM64Reg addr_reg = W0;
ARM64Reg addr_reg = ARM64Reg::W0;
u32 imm_addr = 0;
bool is_immediate = false;
@ -113,8 +113,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[dest_reg] = 0;
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -135,18 +135,18 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, dest_reg, XA, regs_in_use, fprs_in_use);
}
gpr.Unlock(W0, W30);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W1, W30);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
ARM64Reg RS = gpr.R(value);
ARM64Reg reg_dest = INVALID_REG;
ARM64Reg reg_off = INVALID_REG;
ARM64Reg reg_dest = ARM64Reg::INVALID_REG;
ARM64Reg reg_off = ARM64Reg::INVALID_REG;
if (regOffset != -1 && !gpr.IsImm(regOffset))
reg_off = gpr.R(regOffset);
@ -155,10 +155,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
ARM64Reg addr_reg = W1;
ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
bool is_immediate = false;
@ -238,22 +238,22 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else
accessSize = 8;
LDR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
if (accessSize == 32)
{
REV32(W1, RS);
STR(IndexType::Post, W1, X0, 4);
REV32(ARM64Reg::W1, RS);
STR(IndexType::Post, ARM64Reg::W1, ARM64Reg::X0, 4);
}
else if (accessSize == 16)
{
REV16(W1, RS);
STRH(IndexType::Post, W1, X0, 2);
REV16(ARM64Reg::W1, RS);
STRH(IndexType::Post, ARM64Reg::W1, ARM64Reg::X0, 2);
}
else
{
STRB(IndexType::Post, RS, X0, 1);
STRB(IndexType::Post, RS, ARM64Reg::X0, 1);
}
STR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
}
else if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
@ -274,7 +274,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, RS, XA, regs_in_use, fprs_in_use);
}
gpr.Unlock(W0, W1, W30);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
}
void JitArm64::lXX(UGeckoInstruction inst)
@ -538,9 +538,9 @@ void JitArm64::dcbx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
gpr.Lock(W0);
gpr.Lock(ARM64Reg::W0);
ARM64Reg addr = W0;
ARM64Reg addr = ARM64Reg::W0;
u32 a = inst.RA, b = inst.RB;
@ -555,17 +555,17 @@ void JitArm64::dcbx(UGeckoInstruction inst)
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, X30);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
MOVI2R(X1, 32);
MOVI2R(X2, 0);
MOVP2R(X3, &JitInterface::InvalidateICache);
BLR(X3);
MOVI2R(ARM64Reg::X1, 32);
MOVI2R(ARM64Reg::X2, 0);
MOVP2R(ARM64Reg::X3, &JitInterface::InvalidateICache);
BLR(ARM64Reg::X3);
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
ABI_PopRegisters(gprs_to_push);
gpr.Unlock(W0);
gpr.Unlock(ARM64Reg::W0);
}
void JitArm64::dcbt(UGeckoInstruction inst)
@ -596,9 +596,9 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(W0);
gpr.Lock(ARM64Reg::W0);
ARM64Reg addr_reg = W0;
ARM64Reg addr_reg = ARM64Reg::W0;
if (a)
{
@ -645,12 +645,12 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[W0] = 0;
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, W0, EncodeRegTo64(addr_reg),
gprs_to_push, fprs_to_push);
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, ARM64Reg::W0,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
gpr.Unlock(W0);
gpr.Unlock(ARM64Reg::W0);
}
void JitArm64::eieio(UGeckoInstruction inst)

View File

@ -78,11 +78,11 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_F64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(W0, W30);
fpr.Lock(Q0);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
const ARM64Reg VD = fpr.RW(inst.FD, type);
ARM64Reg addr_reg = W0;
ARM64Reg addr_reg = ARM64Reg::W0;
if (update)
{
@ -164,9 +164,9 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
fprs_in_use[0] = 0; // Q0
fprs_in_use[VD - Q0] = 0;
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
fprs_in_use[DecodeReg(VD)] = 0;
if (jo.fastmem_arena && is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{
@ -177,8 +177,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VD, XA, regs_in_use, fprs_in_use);
}
gpr.Unlock(W0, W30);
fpr.Unlock(Q0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
}
void JitArm64::stfXX(UGeckoInstruction inst)
@ -242,8 +242,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
gpr.Lock(W0, W1, W30);
fpr.Lock(Q0);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
const bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);
@ -255,7 +255,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
flags |= BackPatchInfo::FLAG_SIZE_F32I;
}
ARM64Reg addr_reg = W1;
ARM64Reg addr_reg = ARM64Reg::W1;
if (update)
{
@ -344,9 +344,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
fprs_in_use[0] = 0; // Q0
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (is_immediate)
{
@ -358,25 +358,25 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else
accessSize = 32;
LDR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
if (flags & BackPatchInfo::FLAG_SIZE_F64)
{
m_float_emit.REV64(8, Q0, V0);
m_float_emit.REV64(8, ARM64Reg::Q0, V0);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0));
m_float_emit.REV32(8, D0, D0);
m_float_emit.FCVT(32, 64, ARM64Reg::D0, EncodeRegToDouble(V0));
m_float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.REV32(8, D0, V0);
m_float_emit.REV32(8, ARM64Reg::D0, V0);
}
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? Q0 : D0, X0,
accessSize >> 3);
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,
ARM64Reg::X0, accessSize >> 3);
STR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
js.fifoBytesSinceCheck += accessSize >> 3;
if (update)
@ -399,6 +399,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
{
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, V0, XA, regs_in_use, fprs_in_use);
}
gpr.Unlock(W0, W1, W30);
fpr.Unlock(Q0);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
}

View File

@ -34,13 +34,13 @@ void JitArm64::psq_l(UGeckoInstruction inst)
const bool update = inst.OPCD == 57;
const s32 offset = inst.SIMM_12;
gpr.Lock(W0, W1, W2, W30);
fpr.Lock(Q0, Q1);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const ARM64Reg arm_addr = gpr.R(inst.RA);
constexpr ARM64Reg scale_reg = W0;
constexpr ARM64Reg addr_reg = W1;
constexpr ARM64Reg type_reg = W2;
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
ARM64Reg VS;
if (inst.RA || update) // Always uses the register on update
@ -81,22 +81,22 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale
MOVP2R(X30, inst.W ? single_load_quantized : paired_load_quantized);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
MOVP2R(ARM64Reg::X30, inst.W ? single_load_quantized : paired_load_quantized);
LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));
VS = fpr.RW(inst.RS, RegType::Single);
m_float_emit.ORR(EncodeRegToDouble(VS), D0, D0);
m_float_emit.ORR(EncodeRegToDouble(VS), ARM64Reg::D0, ARM64Reg::D0);
}
if (inst.W)
{
m_float_emit.FMOV(S0, 0x70); // 1.0 as a Single
m_float_emit.INS(32, VS, 1, Q0, 0);
m_float_emit.FMOV(ARM64Reg::S0, 0x70); // 1.0 as a Single
m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0);
}
gpr.Unlock(W0, W1, W2, W30);
fpr.Unlock(Q0, Q1);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
void JitArm64::psq_st(UGeckoInstruction inst)
@ -116,17 +116,17 @@ void JitArm64::psq_st(UGeckoInstruction inst)
const bool update = inst.OPCD == 61;
const s32 offset = inst.SIMM_12;
gpr.Lock(W0, W1, W2, W30);
fpr.Lock(Q0, Q1);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const bool single = fpr.IsSingle(inst.RS);
const ARM64Reg arm_addr = gpr.R(inst.RA);
const ARM64Reg VS = fpr.R(inst.RS, single ? RegType::Single : RegType::Register);
constexpr ARM64Reg scale_reg = W0;
constexpr ARM64Reg addr_reg = W1;
constexpr ARM64Reg type_reg = W2;
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg type_reg = ARM64Reg::W2;
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
@ -169,14 +169,14 @@ void JitArm64::psq_st(UGeckoInstruction inst)
{
if (single)
{
m_float_emit.ORR(D0, VS, VS);
m_float_emit.ORR(ARM64Reg::D0, VS, VS);
}
else
{
if (inst.W)
m_float_emit.FCVT(32, 64, D0, VS);
m_float_emit.FCVT(32, 64, ARM64Reg::D0, VS);
else
m_float_emit.FCVTN(32, D0, VS);
m_float_emit.FCVTN(32, ARM64Reg::D0, VS);
}
LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + inst.I));
@ -192,26 +192,26 @@ void JitArm64::psq_st(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(fail);
// Slow
MOVP2R(X30, &paired_store_quantized[16 + inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
MOVP2R(ARM64Reg::X30, &paired_store_quantized[16 + inst.W * 8]);
LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true));
ABI_PushRegisters(gprs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30);
BLR(EncodeRegTo64(type_reg));
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30);
ABI_PopRegisters(gprs_in_use);
FixupBranch continue1 = B();
SwitchToNearCode();
SetJumpTarget(pass);
// Fast
MOVP2R(X30, &paired_store_quantized[inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
MOVP2R(ARM64Reg::X30, &paired_store_quantized[inst.W * 8]);
LDR(EncodeRegTo64(type_reg), ARM64Reg::X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));
SetJumpTarget(continue1);
}
gpr.Unlock(W0, W1, W2, W30);
fpr.Unlock(Q0, Q1);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}

View File

@ -123,8 +123,8 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
const ARM64Reg VB = reg_encoder(fpr.R(b, type));
const ARM64Reg VC = reg_encoder(fpr.R(c, type));
const ARM64Reg VD = reg_encoder(fpr.RW(d, type));
ARM64Reg V0Q = INVALID_REG;
ARM64Reg V0 = INVALID_REG;
ARM64Reg V0Q = ARM64Reg::INVALID_REG;
ARM64Reg V0 = ARM64Reg::INVALID_REG;
if (d != b && (d == a || d == c))
{
V0Q = fpr.GetReg();
@ -262,7 +262,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
fpr.FixSinglePrecision(d);
if (V0Q != INVALID_REG)
if (V0Q != ARM64Reg::INVALID_REG)
fpr.Unlock(V0Q);
}

View File

@ -42,7 +42,7 @@ ARM64Reg Arm64RegCache::GetReg()
// We can't return anything reasonable in this case. Return INVALID_REG and watch the failure
// happen
ASSERT_MSG(DYNA_REC, 0, "All available registers are locked!");
return INVALID_REG;
return ARM64Reg::INVALID_REG;
}
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
@ -122,17 +122,17 @@ void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats)
bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) const
{
static constexpr std::array<ARM64Reg, 11> callee_regs{{
X28,
X27,
X26,
X25,
X24,
X23,
X22,
X21,
X20,
X19,
INVALID_REG,
ARM64Reg::X28,
ARM64Reg::X27,
ARM64Reg::X26,
ARM64Reg::X25,
ARM64Reg::X24,
ARM64Reg::X23,
ARM64Reg::X22,
ARM64Reg::X21,
ARM64Reg::X20,
ARM64Reg::X19,
ARM64Reg::INVALID_REG,
}};
return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end();
@ -180,7 +180,7 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
if (!maintain_state)
{
UnlockRegister(DecodeReg(host_reg));
UnlockRegister(EncodeRegTo32(host_reg));
reg.Flush();
}
}
@ -188,7 +188,7 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
{
if (!reg.GetImm())
{
m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ZR : WZR, PPC_REG,
m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG,
u32(guest_reg.ppc_offset));
}
else
@ -198,7 +198,7 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
m_emit->MOVI2R(host_reg, reg.GetImm());
m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
UnlockRegister(DecodeReg(host_reg));
UnlockRegister(EncodeRegTo32(host_reg));
}
if (!maintain_state)
@ -228,8 +228,8 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset));
if (!maintain_state)
{
UnlockRegister(DecodeReg(RX1));
UnlockRegister(DecodeReg(RX2));
UnlockRegister(EncodeRegTo32(RX1));
UnlockRegister(EncodeRegTo32(RX2));
reg1.Flush();
reg2.Flush();
}
@ -299,14 +299,14 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg)
break;
}
// We've got an issue if we end up here
return INVALID_REG;
return ARM64Reg::INVALID_REG;
}
void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm)
{
OpArg& reg = guest_reg.reg;
if (reg.GetType() == RegType::Register)
UnlockRegister(DecodeReg(reg.GetReg()));
UnlockRegister(EncodeRegTo32(reg.GetReg()));
reg.LoadToImm(imm);
}
@ -332,36 +332,36 @@ void Arm64GPRCache::GetAllocationOrder()
// Callee saved registers first in hopes that we will keep everything stored there first
static constexpr std::array<ARM64Reg, 29> allocation_order{{
// Callee saved
W27,
W26,
W25,
W24,
W23,
W22,
W21,
W20,
W19,
ARM64Reg::W27,
ARM64Reg::W26,
ARM64Reg::W25,
ARM64Reg::W24,
ARM64Reg::W23,
ARM64Reg::W22,
ARM64Reg::W21,
ARM64Reg::W20,
ARM64Reg::W19,
// Caller saved
W17,
W16,
W15,
W14,
W13,
W12,
W11,
W10,
W9,
W8,
W7,
W6,
W5,
W4,
W3,
W2,
W1,
W0,
W30,
ARM64Reg::W17,
ARM64Reg::W16,
ARM64Reg::W15,
ARM64Reg::W14,
ARM64Reg::W13,
ARM64Reg::W12,
ARM64Reg::W11,
ARM64Reg::W10,
ARM64Reg::W9,
ARM64Reg::W8,
ARM64Reg::W7,
ARM64Reg::W6,
ARM64Reg::W5,
ARM64Reg::W4,
ARM64Reg::W3,
ARM64Reg::W2,
ARM64Reg::W1,
ARM64Reg::W0,
ARM64Reg::W30,
}};
for (ARM64Reg reg : allocation_order)
@ -381,11 +381,10 @@ BitSet32 Arm64GPRCache::GetCallerSavedUsed() const
void Arm64GPRCache::FlushByHost(ARM64Reg host_reg)
{
host_reg = DecodeReg(host_reg);
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
const OpArg& reg = m_guest_registers[i];
if (reg.GetType() == RegType::Register && DecodeReg(reg.GetReg()) == host_reg)
if (reg.GetType() == RegType::Register && DecodeReg(reg.GetReg()) == DecodeReg(host_reg))
{
FlushRegister(i, false);
return;
@ -520,7 +519,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
break;
}
// We've got an issue if we end up here
return INVALID_REG;
return ARM64Reg::INVALID_REG;
}
ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
@ -589,40 +588,40 @@ void Arm64FPRCache::GetAllocationOrder()
{
static constexpr std::array<ARM64Reg, 32> allocation_order{{
// Callee saved
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15,
ARM64Reg::Q8,
ARM64Reg::Q9,
ARM64Reg::Q10,
ARM64Reg::Q11,
ARM64Reg::Q12,
ARM64Reg::Q13,
ARM64Reg::Q14,
ARM64Reg::Q15,
// Caller saved
Q16,
Q17,
Q18,
Q19,
Q20,
Q21,
Q22,
Q23,
Q24,
Q25,
Q26,
Q27,
Q28,
Q29,
Q30,
Q31,
Q7,
Q6,
Q5,
Q4,
Q3,
Q2,
Q1,
Q0,
ARM64Reg::Q16,
ARM64Reg::Q17,
ARM64Reg::Q18,
ARM64Reg::Q19,
ARM64Reg::Q20,
ARM64Reg::Q21,
ARM64Reg::Q22,
ARM64Reg::Q23,
ARM64Reg::Q24,
ARM64Reg::Q25,
ARM64Reg::Q26,
ARM64Reg::Q27,
ARM64Reg::Q28,
ARM64Reg::Q29,
ARM64Reg::Q30,
ARM64Reg::Q31,
ARM64Reg::Q7,
ARM64Reg::Q6,
ARM64Reg::Q5,
ARM64Reg::Q4,
ARM64Reg::Q3,
ARM64Reg::Q2,
ARM64Reg::Q1,
ARM64Reg::Q0,
}};
for (ARM64Reg reg : allocation_order)
@ -648,15 +647,15 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg)
bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg) const
{
static constexpr std::array<ARM64Reg, 9> callee_regs{{
Q8,
Q9,
Q10,
Q11,
Q12,
Q13,
Q14,
Q15,
INVALID_REG,
ARM64Reg::Q8,
ARM64Reg::Q9,
ARM64Reg::Q10,
ARM64Reg::Q11,
ARM64Reg::Q12,
ARM64Reg::Q13,
ARM64Reg::Q14,
ARM64Reg::Q15,
ARM64Reg::INVALID_REG,
}};
return std::find(callee_regs.begin(), callee_regs.end(), reg) != callee_regs.end();
@ -745,7 +744,7 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() const
for (const auto& it : m_host_registers)
{
if (it.IsLocked())
registers[it.GetReg() - Q0] = true;
registers[DecodeReg(it.GetReg())] = true;
}
return registers;
}

View File

@ -18,11 +18,11 @@
// Dedicated host registers
// memory base register
constexpr Arm64Gen::ARM64Reg MEM_REG = Arm64Gen::X28;
constexpr Arm64Gen::ARM64Reg MEM_REG = Arm64Gen::ARM64Reg::X28;
// ppcState pointer
constexpr Arm64Gen::ARM64Reg PPC_REG = Arm64Gen::X29;
constexpr Arm64Gen::ARM64Reg PPC_REG = Arm64Gen::ARM64Reg::X29;
// PC register when calling the dispatcher
constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::W26;
constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26;
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
@ -84,13 +84,13 @@ public:
m_type = RegType::Immediate;
m_value = imm;
m_reg = Arm64Gen::INVALID_REG;
m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
}
void Flush()
{
// Invalidate any previous information
m_type = RegType::NotLoaded;
m_reg = Arm64Gen::INVALID_REG;
m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
// Arbitrarily large value that won't roll over on a lot of increments
m_last_used = 0xFFFF;
@ -104,8 +104,8 @@ public:
private:
// For REG_REG
RegType m_type = RegType::NotLoaded; // store type
Arm64Gen::ARM64Reg m_reg = Arm64Gen::INVALID_REG; // host register we are in
RegType m_type = RegType::NotLoaded; // store type
Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; // host register we are in
// For REG_IMM
u32 m_value = 0; // IMM value
@ -130,7 +130,7 @@ public:
bool operator!=(Arm64Gen::ARM64Reg reg) const { return !operator==(reg); }
private:
Arm64Gen::ARM64Reg m_reg = Arm64Gen::INVALID_REG;
Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
bool m_locked = false;
};

View File

@ -17,7 +17,7 @@ using namespace Arm64Gen;
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARM64Reg XA = gpr.CR(field);
ARM64Reg WA = DecodeReg(XA);
ARM64Reg WA = EncodeRegTo32(XA);
switch (bit)
{
@ -26,7 +26,7 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
return jump_if_set ? CBZ(WA) : CBNZ(WA);
case PowerPC::CR_GT_BIT: // check val > 0
CMP(XA, SP);
CMP(XA, ARM64Reg::SP);
return B(jump_if_set ? CC_GT : CC_LE);
case PowerPC::CR_LT_BIT: // check bit 62 set
return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
@ -84,7 +84,7 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XB = gpr.CR(inst.CRFD);
ARM64Reg WB = DecodeReg(XB);
ARM64Reg WB = EncodeRegTo32(XB);
// Copy XER[0-3] into CR[inst.CRFD]
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
@ -99,8 +99,8 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
LDR(XB, XB, XA);
// Clear XER[0-3]
STRB(IndexType::Unsigned, WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
STRB(IndexType::Unsigned, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));
STRB(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
STRB(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA);
}
@ -278,7 +278,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
SUB(Xresult, Xresult, XB);
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
ORRI2R(XB, ZR, 0xAAAAAAAAAAAAAAAA);
ORRI2R(XB, ARM64Reg::ZR, 0xAAAAAAAAAAAAAAAA);
ADD(XB, XB, 1);
UMULH(Xresult, Xresult, XB);
@ -461,7 +461,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
ORR(XB, XA, 64 - 63, 0, true); // XA | 1<<63
CMP(XA, ZR);
CMP(XA, ARM64Reg::ZR);
CSEL(XA, XA, XB, CC_NEQ);
gpr.Unlock(WB);
}
@ -509,7 +509,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
bool negate = i ? negateB : negateA;
ARM64Reg XC = gpr.CR(field);
ARM64Reg WC = DecodeReg(XC);
ARM64Reg WC = EncodeRegTo32(XC);
switch (bit)
{
case PowerPC::CR_SO_BIT: // check bit 61 set
@ -519,12 +519,12 @@ void JitArm64::crXXX(UGeckoInstruction inst)
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
CMP(WC, WZR);
CMP(WC, ARM64Reg::WZR);
CSET(out, negate ? CC_NEQ : CC_EQ);
break;
case PowerPC::CR_GT_BIT: // check val > 0
CMP(XC, ZR);
CMP(XC, ARM64Reg::ZR);
CSET(out, negate ? CC_LE : CC_GT);
break;
@ -565,7 +565,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int bit = 3 - (inst.CRBD & 3);
gpr.Unlock(WB);
WB = INVALID_REG;
WB = ARM64Reg::INVALID_REG;
gpr.BindCRToRegister(field, true);
XB = gpr.CR(field);
@ -577,7 +577,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WC = gpr.GetReg();
ARM64Reg XC = EncodeRegTo64(WC);
ORR(XC, XB, 64 - 63, 0, true); // XB | 1<<63
CMP(XB, ZR);
CMP(XB, ARM64Reg::ZR);
CSEL(XB, XB, XC, CC_NEQ);
gpr.Unlock(WC);
}
@ -623,7 +623,7 @@ void JitArm64::mfcr(UGeckoInstruction inst)
for (int i = 0; i < 8; i++)
{
ARM64Reg CR = gpr.CR(i);
ARM64Reg WCR = DecodeReg(CR);
ARM64Reg WCR = EncodeRegTo32(CR);
// SO
if (i == 0)
@ -638,12 +638,12 @@ void JitArm64::mfcr(UGeckoInstruction inst)
// EQ
ORR(WC, WA, 32 - 1, 0); // WA | 1<<1
CMP(WCR, WZR);
CMP(WCR, ARM64Reg::WZR);
CSEL(WA, WC, WA, CC_EQ);
// GT
ORR(WC, WA, 32 - 2, 0); // WA | 1<<2
CMP(CR, ZR);
CMP(CR, ARM64Reg::ZR);
CSEL(WA, WC, WA, CC_GT);
// LT
@ -672,7 +672,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
{
gpr.BindCRToRegister(i, false);
ARM64Reg CR = gpr.CR(i);
ARM64Reg WCR = DecodeReg(CR);
ARM64Reg WCR = EncodeRegTo32(CR);
if (i != 7)
LSR(WCR, RS, 28 - i * 4);

View File

@ -28,27 +28,27 @@ void JitArm64::GenerateAsm()
enter_code = GetCodePtr();
ABI_PushRegisters(regs_to_save);
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, X30);
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X30);
MOVP2R(PPC_REG, &PowerPC::ppcState);
// Swap the stack pointer, so we have proper guard pages.
ADD(X0, SP, 0);
MOVP2R(X1, &m_saved_stack_pointer);
STR(IndexType::Unsigned, X0, X1, 0);
MOVP2R(X1, &m_stack_pointer);
LDR(IndexType::Unsigned, X0, X1, 0);
FixupBranch no_fake_stack = CBZ(X0);
ADD(SP, X0, 0);
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
MOVP2R(ARM64Reg::X1, &m_stack_pointer);
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
FixupBranch no_fake_stack = CBZ(ARM64Reg::X0);
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
SetJumpTarget(no_fake_stack);
// Push {nullptr; -1} as invalid destination on the stack.
MOVI2R(X0, 0xFFFFFFFF);
STP(IndexType::Pre, ZR, X0, SP, -16);
MOVI2R(ARM64Reg::X0, 0xFFFFFFFF);
STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16);
// Store the stack pointer, so we can reset it if the BLR optimization fails.
ADD(X0, SP, 0);
STR(IndexType::Unsigned, X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
ADD(ARM64Reg::X0, ARM64Reg::SP, 0);
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().
// Advance() does an exception check so we don't know what PC to use until afterwards.
@ -93,18 +93,18 @@ void JitArm64::GenerateAsm()
SetJumpTarget(membaseend);
// iCache[(address >> 2) & iCache_Mask];
ARM64Reg pc_masked = W25;
ARM64Reg cache_base = X27;
ARM64Reg block = X30;
ORRI2R(pc_masked, WZR, JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3);
ARM64Reg pc_masked = ARM64Reg::W25;
ARM64Reg cache_base = ARM64Reg::X27;
ARM64Reg block = ARM64Reg::X30;
ORRI2R(pc_masked, ARM64Reg::WZR, JitBaseBlockCache::FAST_BLOCK_MAP_MASK << 3);
AND(pc_masked, pc_masked, DISPATCHER_PC, ArithOption(DISPATCHER_PC, ShiftType::LSL, 1));
MOVP2R(cache_base, GetBlockCache()->GetFastBlockMap());
LDR(block, cache_base, EncodeRegTo64(pc_masked));
FixupBranch not_found = CBZ(block);
// b.effectiveAddress != addr || b.msrBits != msr
ARM64Reg pc_and_msr = W25;
ARM64Reg pc_and_msr2 = W24;
ARM64Reg pc_and_msr = ARM64Reg::W25;
ARM64Reg pc_and_msr2 = ARM64Reg::W24;
LDR(IndexType::Unsigned, pc_and_msr, block, offsetof(JitBlockData, effectiveAddress));
CMP(pc_and_msr, DISPATCHER_PC);
FixupBranch pc_missmatch = B(CC_NEQ);
@ -125,28 +125,28 @@ void JitArm64::GenerateAsm()
// Call C version of Dispatch().
STR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVP2R(X8, reinterpret_cast<void*>(&JitBase::Dispatch));
MOVP2R(X0, this);
BLR(X8);
MOVP2R(ARM64Reg::X8, reinterpret_cast<void*>(&JitBase::Dispatch));
MOVP2R(ARM64Reg::X0, this);
BLR(ARM64Reg::X8);
FixupBranch no_block_available = CBZ(X0);
FixupBranch no_block_available = CBZ(ARM64Reg::X0);
// set the mem_base based on MSR flags and jump to next block.
LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVP2R(MEM_REG, Memory::physical_base);
BR(X0);
BR(ARM64Reg::X0);
SetJumpTarget(physmem);
MOVP2R(MEM_REG, Memory::logical_base);
BR(X0);
BR(ARM64Reg::X0);
// Call JIT
SetJumpTarget(no_block_available);
ResetStack();
MOVP2R(X0, this);
MOV(W1, DISPATCHER_PC);
MOVP2R(X8, reinterpret_cast<void*>(&JitTrampoline));
BLR(X8);
MOVP2R(ARM64Reg::X0, this);
MOV(ARM64Reg::W1, DISPATCHER_PC);
MOVP2R(ARM64Reg::X8, reinterpret_cast<void*>(&JitTrampoline));
BLR(ARM64Reg::X8);
LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
B(dispatcher_no_check);
@ -158,15 +158,15 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
MOVP2R(X0, CPU::GetStatePtr());
LDR(IndexType::Unsigned, W0, X0, 0);
MOVP2R(ARM64Reg::X0, CPU::GetStatePtr());
LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0);
CMP(W0, 0);
CMP(ARM64Reg::W0, 0);
FixupBranch Exit = B(CC_NEQ);
SetJumpTarget(to_start_of_timing_slice);
MOVP2R(X8, &CoreTiming::Advance);
BLR(X8);
MOVP2R(ARM64Reg::X8, &CoreTiming::Advance);
BLR(ARM64Reg::X8);
// Load the PC back into DISPATCHER_PC (the exception handler might have changed it)
LDR(IndexType::Unsigned, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
@ -177,13 +177,13 @@ void JitArm64::GenerateAsm()
SetJumpTarget(Exit);
// Reset the stack pointer, as the BLR optimization have touched it.
MOVP2R(X1, &m_saved_stack_pointer);
LDR(IndexType::Unsigned, X0, X1, 0);
ADD(SP, X0, 0);
MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer);
LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0);
ADD(ARM64Reg::SP, ARM64Reg::X0, 0);
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, X30);
m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30);
ABI_PopRegisters(regs_to_save);
RET(X30);
RET(ARM64Reg::X30);
JitRegister::Register(enter_code, GetCodePtr(), "JIT_Dispatcher");
@ -201,8 +201,8 @@ void JitArm64::GenerateCommonAsm()
// Q0 is the return for loads
// is the register for stores
// Q1 is a temporary
ARM64Reg addr_reg = X1;
ARM64Reg scale_reg = X0;
ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg scale_reg = ARM64Reg::X0;
ARM64FloatEmitter float_emit(this);
const u8* start = GetCodePtr();
@ -211,129 +211,129 @@ void JitArm64::GenerateCommonAsm()
const u8* loadPairedFloatTwo = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(32, 1, D0, addr_reg);
float_emit.REV32(8, D0, D0);
RET(X30);
float_emit.LD1(32, 1, ARM64Reg::D0, addr_reg);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
RET(ARM64Reg::X30);
}
const u8* loadPairedU8Two = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.UXTL(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
float_emit.LDR(16, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedS8Two = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.SXTL(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
float_emit.LDR(16, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedU16Two = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(16, 1, D0, addr_reg);
float_emit.REV16(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
float_emit.LD1(16, 1, ARM64Reg::D0, addr_reg);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedS16Two = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(16, 1, D0, addr_reg);
float_emit.REV16(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
float_emit.LD1(16, 1, ARM64Reg::D0, addr_reg);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedFloatOne = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(32, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.REV32(8, D0, D0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
RET(ARM64Reg::X30);
}
const u8* loadPairedU8One = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(8, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.UXTL(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
float_emit.LDR(8, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedS8One = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(8, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.SXTL(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
float_emit.LDR(8, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedU16One = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.REV16(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
float_emit.LDR(16, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
const u8* loadPairedS16One = GetCodePtr();
{
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, IndexType::Unsigned, D0, addr_reg, 0);
float_emit.REV16(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
float_emit.LDR(16, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
RET(X30);
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
RET(ARM64Reg::X30);
}
JitRegister::Register(start, GetCodePtr(), "JIT_QuantizedLoad");
@ -370,245 +370,245 @@ void JitArm64::GenerateCommonAsm()
const u8* storePairedFloatSlow;
{
storePairedFloat = GetCodePtr();
float_emit.REV32(8, D0, D0);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(64, Q0, 0, addr_reg, SP);
RET(X30);
float_emit.ST1(64, ARM64Reg::Q0, 0, addr_reg, ARM64Reg::SP);
RET(ARM64Reg::X30);
storePairedFloatSlow = GetCodePtr();
float_emit.UMOV(64, X0, Q0, 0);
ROR(X0, X0, 32);
MOVP2R(X2, &PowerPC::Write_U64);
BR(X2);
float_emit.UMOV(64, ARM64Reg::X0, ARM64Reg::Q0, 0);
ROR(ARM64Reg::X0, ARM64Reg::X0, 32);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U64);
BR(ARM64Reg::X2);
}
const u8* storePairedU8;
const u8* storePairedU8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, D0, D0);
float_emit.UQXTN(16, D0, D0);
float_emit.UQXTN(8, D0, D0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(8, ARM64Reg::D0, ARM64Reg::D0);
};
storePairedU8 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(16, Q0, 0, addr_reg, SP);
RET(X30);
float_emit.ST1(16, ARM64Reg::Q0, 0, addr_reg, ARM64Reg::SP);
RET(ARM64Reg::X30);
storePairedU8Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0);
REV16(ARM64Reg::W0, ARM64Reg::W0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16);
BR(ARM64Reg::X2);
}
const u8* storePairedS8;
const u8* storePairedS8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, D0, D0);
float_emit.SQXTN(16, D0, D0);
float_emit.SQXTN(8, D0, D0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(8, ARM64Reg::D0, ARM64Reg::D0);
};
storePairedS8 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(16, Q0, 0, addr_reg, SP);
RET(X30);
float_emit.ST1(16, ARM64Reg::Q0, 0, addr_reg, ARM64Reg::SP);
RET(ARM64Reg::X30);
storePairedS8Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0);
REV16(ARM64Reg::W0, ARM64Reg::W0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16);
BR(ARM64Reg::X2);
}
const u8* storePairedU16;
const u8* storePairedU16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZU(32, D0, D0);
float_emit.UQXTN(16, D0, D0);
float_emit.REV16(8, D0, D0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
};
storePairedU16 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(32, Q0, 0, addr_reg, SP);
RET(X30);
float_emit.ST1(32, ARM64Reg::Q0, 0, addr_reg, ARM64Reg::SP);
RET(ARM64Reg::X30);
storePairedU16Slow = GetCodePtr();
emit_quantize();
float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32);
BR(ARM64Reg::X2);
}
const u8* storePairedS16; // Used by Viewtiful Joe's intro movie
const u8* storePairedS16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0);
float_emit.FCVTZS(32, D0, D0);
float_emit.SQXTN(16, D0, D0);
float_emit.REV16(8, D0, D0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
};
storePairedS16 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(32, Q0, 0, addr_reg, SP);
RET(X30);
float_emit.ST1(32, ARM64Reg::Q0, 0, addr_reg, ARM64Reg::SP);
RET(ARM64Reg::X30);
storePairedS16Slow = GetCodePtr();
emit_quantize();
float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32);
BR(ARM64Reg::X2);
}
const u8* storeSingleFloat;
const u8* storeSingleFloatSlow;
{
storeSingleFloat = GetCodePtr();
float_emit.REV32(8, D0, D0);
float_emit.REV32(8, ARM64Reg::D0, ARM64Reg::D0);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.STR(32, IndexType::Unsigned, D0, addr_reg, 0);
RET(X30);
float_emit.STR(32, IndexType::Unsigned, ARM64Reg::D0, addr_reg, 0);
RET(ARM64Reg::X30);
storeSingleFloatSlow = GetCodePtr();
float_emit.UMOV(32, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
float_emit.UMOV(32, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U32);
BR(ARM64Reg::X2);
}
const u8* storeSingleU8; // Used by MKWii
const u8* storeSingleU8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, D0, D0);
float_emit.UQXTN(16, D0, D0);
float_emit.UQXTN(8, D0, D0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(8, ARM64Reg::D0, ARM64Reg::D0);
};
storeSingleU8 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(8, Q0, 0, addr_reg);
RET(X30);
float_emit.ST1(8, ARM64Reg::Q0, 0, addr_reg);
RET(ARM64Reg::X30);
storeSingleU8Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(8, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U8);
BR(X2);
float_emit.UMOV(8, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U8);
BR(ARM64Reg::X2);
}
const u8* storeSingleS8;
const u8* storeSingleS8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, D0, D0);
float_emit.SQXTN(16, D0, D0);
float_emit.SQXTN(8, D0, D0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(8, ARM64Reg::D0, ARM64Reg::D0);
};
storeSingleS8 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(8, Q0, 0, addr_reg);
RET(X30);
float_emit.ST1(8, ARM64Reg::Q0, 0, addr_reg);
RET(ARM64Reg::X30);
storeSingleS8Slow = GetCodePtr();
emit_quantize();
float_emit.SMOV(8, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U8);
BR(X2);
float_emit.SMOV(8, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U8);
BR(ARM64Reg::X2);
}
const u8* storeSingleU16; // Used by MKWii
const u8* storeSingleU16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZU(32, D0, D0);
float_emit.UQXTN(16, D0, D0);
float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
};
storeSingleU16 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.REV16(8, D0, D0);
float_emit.ST1(16, Q0, 0, addr_reg);
RET(X30);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.ST1(16, ARM64Reg::Q0, 0, addr_reg);
RET(ARM64Reg::X30);
storeSingleU16Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
float_emit.UMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16);
BR(ARM64Reg::X2);
}
const u8* storeSingleS16;
const u8* storeSingleS16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
MOVP2R(ARM64Reg::X2, &m_quantizeTableS);
ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3));
float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0);
float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1);
float_emit.FCVTZS(32, D0, D0);
float_emit.SQXTN(16, D0, D0);
float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SQXTN(16, ARM64Reg::D0, ARM64Reg::D0);
};
storeSingleS16 = GetCodePtr();
emit_quantize();
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.REV16(8, D0, D0);
float_emit.ST1(16, Q0, 0, addr_reg);
RET(X30);
float_emit.REV16(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.ST1(16, ARM64Reg::Q0, 0, addr_reg);
RET(ARM64Reg::X30);
storeSingleS16Slow = GetCodePtr();
emit_quantize();
float_emit.SMOV(16, W0, Q0, 0);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
float_emit.SMOV(16, ARM64Reg::W0, ARM64Reg::Q0, 0);
MOVP2R(ARM64Reg::X2, &PowerPC::Write_U16);
BR(ARM64Reg::X2);
}
JitRegister::Register(start, GetCodePtr(), "JIT_QuantizedStore");

View File

@ -39,13 +39,13 @@ private:
switch (sbits)
{
case 8:
m_emit->STRB(IndexType::Unsigned, reg, X0, 0);
m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
break;
case 16:
m_emit->STRH(IndexType::Unsigned, reg, X0, 0);
m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
break;
case 32:
m_emit->STR(IndexType::Unsigned, reg, X0, 0);
m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Unknown size %d passed to MMIOWriteCodeGenerator!", sbits);
@ -55,7 +55,7 @@ private:
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVP2R(X0, ptr);
m_emit->MOVP2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
@ -67,8 +67,8 @@ private:
}
else
{
m_emit->ANDI2R(W1, m_src_reg, mask, W1);
StoreFromRegister(sbits, W1);
m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1);
StoreFromRegister(sbits, ARM64Reg::W1);
}
}
@ -77,11 +77,11 @@ private:
ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use, X1);
m_emit->MOVI2R(W1, m_address);
m_emit->MOV(W2, m_src_reg);
float_emit.ABI_PushRegisters(m_fprs_in_use, ARM64Reg::X1);
m_emit->MOVI2R(ARM64Reg::W1, m_address);
m_emit->MOV(ARM64Reg::W2, m_src_reg);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use, X1);
float_emit.ABI_PopRegisters(m_fprs_in_use, ARM64Reg::X1);
m_emit->ABI_PopRegisters(m_gprs_in_use);
}
@ -127,18 +127,18 @@ private:
{
case 8:
if (m_sign_extend && !dont_extend)
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, X0, 0);
m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
else
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, X0, 0);
m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
break;
case 16:
if (m_sign_extend && !dont_extend)
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, X0, 0);
m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
else
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, X0, 0);
m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
break;
case 32:
m_emit->LDR(IndexType::Unsigned, m_dst_reg, X0, 0);
m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Unknown size %d passed to MMIOReadCodeGenerator!", sbits);
@ -148,7 +148,7 @@ private:
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVP2R(X0, ptr);
m_emit->MOVP2R(ARM64Reg::X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
@ -161,7 +161,7 @@ private:
else
{
LoadToRegister(sbits, true);
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, W0);
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
}
@ -172,16 +172,16 @@ private:
ARM64FloatEmitter float_emit(m_emit);
m_emit->ABI_PushRegisters(m_gprs_in_use);
float_emit.ABI_PushRegisters(m_fprs_in_use, X1);
m_emit->MOVI2R(W1, m_address);
float_emit.ABI_PushRegisters(m_fprs_in_use, ARM64Reg::X1);
m_emit->MOVI2R(ARM64Reg::W1, m_address);
m_emit->BLR(m_emit->ABI_SetupLambda(lambda));
float_emit.ABI_PopRegisters(m_fprs_in_use, X1);
float_emit.ABI_PopRegisters(m_fprs_in_use, ARM64Reg::X1);
m_emit->ABI_PopRegisters(m_gprs_in_use);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, W0, 0, sbits - 1);
m_emit->SBFM(m_dst_reg, ARM64Reg::W0, 0, sbits - 1);
else
m_emit->UBFM(m_dst_reg, W0, 0, sbits - 1);
m_emit->UBFM(m_dst_reg, ARM64Reg::W0, 0, sbits - 1);
}
ARM64XEmitter* m_emit;

View File

@ -12,18 +12,18 @@
using namespace Arm64Gen;
constexpr ARM64Reg src_reg = X0;
constexpr ARM64Reg dst_reg = X1;
constexpr ARM64Reg count_reg = W2;
constexpr ARM64Reg skipped_reg = W17;
constexpr ARM64Reg scratch1_reg = W16;
constexpr ARM64Reg scratch2_reg = W15;
constexpr ARM64Reg scratch3_reg = W14;
constexpr ARM64Reg saved_count = W12;
constexpr ARM64Reg src_reg = ARM64Reg::X0;
constexpr ARM64Reg dst_reg = ARM64Reg::X1;
constexpr ARM64Reg count_reg = ARM64Reg::W2;
constexpr ARM64Reg skipped_reg = ARM64Reg::W17;
constexpr ARM64Reg scratch1_reg = ARM64Reg::W16;
constexpr ARM64Reg scratch2_reg = ARM64Reg::W15;
constexpr ARM64Reg scratch3_reg = ARM64Reg::W14;
constexpr ARM64Reg saved_count = ARM64Reg::W12;
constexpr ARM64Reg stride_reg = X11;
constexpr ARM64Reg arraybase_reg = X10;
constexpr ARM64Reg scale_reg = X9;
constexpr ARM64Reg stride_reg = ARM64Reg::X11;
constexpr ARM64Reg arraybase_reg = ARM64Reg::X10;
constexpr ARM64Reg scale_reg = ARM64Reg::X9;
alignas(16) static const float scale_factors[] = {
1.0 / (1ULL << 0), 1.0 / (1ULL << 1), 1.0 / (1ULL << 2), 1.0 / (1ULL << 3),
@ -115,8 +115,8 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
int count_in, int count_out, bool dequantize, u8 scaling_exponent,
AttributeFormat* native_format, s32 offset)
{
ARM64Reg coords = count_in == 3 ? Q31 : D31;
ARM64Reg scale = count_in == 3 ? Q30 : D30;
ARM64Reg coords = count_in == 3 ? ARM64Reg::Q31 : ARM64Reg::D31;
ARM64Reg scale = count_in == 3 ? ARM64Reg::Q30 : ARM64Reg::D30;
int elem_size = GetElementSize(format);
int load_bytes = elem_size * count_in;
@ -253,13 +253,13 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
// B
AND(scratch2_reg, scratch3_reg, 32, 4);
ORR(scratch2_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 3));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 3));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 5));
ORR(scratch1_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16));
ORR(scratch1_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16));
// G
UBFM(scratch2_reg, scratch3_reg, 5, 10);
ORR(scratch2_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 6));
ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 8));
@ -328,19 +328,19 @@ void VertexLoaderARM64::ReadColor(VertexComponentFormat attribute, ColorFormat f
// A
UBFM(scratch2_reg, scratch3_reg, 0, 5);
ORR(scratch2_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 6));
ORR(scratch1_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 24));
ORR(scratch1_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 24));
// B
UBFM(scratch2_reg, scratch3_reg, 6, 11);
ORR(scratch2_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 6));
ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 16));
// G
UBFM(scratch2_reg, scratch3_reg, 12, 17);
ORR(scratch2_reg, WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, ARM64Reg::WSP, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 2));
ORR(scratch2_reg, scratch2_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSR, 6));
ORR(scratch1_reg, scratch1_reg, scratch2_reg, ArithOption(scratch2_reg, ShiftType::LSL, 8));
@ -388,7 +388,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
AlignCode16();
if (IsIndexed(m_VtxDesc.low.Position))
MOV(skipped_reg, WZR);
MOV(skipped_reg, ARM64Reg::WZR);
MOV(saved_count, count_reg);
MOVP2R(stride_reg, g_main_cp_state.array_strides);
@ -544,11 +544,11 @@ void VertexLoaderARM64::GenerateVertexLoader()
m_native_vtx_decl.texcoords[i].integer = false;
LDRB(IndexType::Unsigned, scratch2_reg, src_reg, texmatidx_ofs[i]);
m_float_emit.UCVTF(S31, scratch2_reg);
m_float_emit.UCVTF(ARM64Reg::S31, scratch2_reg);
if (m_VtxDesc.high.TexCoord[i] != VertexComponentFormat::NotPresent)
{
m_float_emit.STR(32, IndexType::Unsigned, D31, dst_reg, m_dst_ofs);
m_float_emit.STR(32, IndexType::Unsigned, ARM64Reg::D31, dst_reg, m_dst_ofs);
m_dst_ofs += sizeof(float);
}
else
@ -557,21 +557,21 @@ void VertexLoaderARM64::GenerateVertexLoader()
if (m_dst_ofs < 256)
{
STUR(SP, dst_reg, m_dst_ofs);
STUR(ARM64Reg::SP, dst_reg, m_dst_ofs);
}
else if (!(m_dst_ofs & 7))
{
// If m_dst_ofs isn't 8byte aligned we can't store an 8byte zero register
// So store two 4byte zero registers
// The destination is always 4byte aligned
STR(IndexType::Unsigned, WSP, dst_reg, m_dst_ofs);
STR(IndexType::Unsigned, WSP, dst_reg, m_dst_ofs + 4);
STR(IndexType::Unsigned, ARM64Reg::WSP, dst_reg, m_dst_ofs);
STR(IndexType::Unsigned, ARM64Reg::WSP, dst_reg, m_dst_ofs + 4);
}
else
{
STR(IndexType::Unsigned, SP, dst_reg, m_dst_ofs);
STR(IndexType::Unsigned, ARM64Reg::SP, dst_reg, m_dst_ofs);
}
m_float_emit.STR(32, IndexType::Unsigned, D31, dst_reg, m_dst_ofs + 8);
m_float_emit.STR(32, IndexType::Unsigned, ARM64Reg::D31, dst_reg, m_dst_ofs + 8);
m_dst_ofs += sizeof(float) * 3;
}
@ -588,8 +588,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
if (IsIndexed(m_VtxDesc.low.Position))
{
SUB(W0, saved_count, skipped_reg);
RET(X30);
SUB(ARM64Reg::W0, saved_count, skipped_reg);
RET(ARM64Reg::X30);
SetJumpTarget(m_skip_vertex);
ADD(skipped_reg, skipped_reg, 1);
@ -597,8 +597,8 @@ void VertexLoaderARM64::GenerateVertexLoader()
}
else
{
MOV(W0, saved_count);
RET(X30);
MOV(ARM64Reg::W0, saved_count);
RET(ARM64Reg::X30);
}
FlushIcache();

View File

@ -26,7 +26,7 @@ public:
ResetCodePtr();
const u8* fn = GetCodePtr();
MOVI2R(W0, value);
MOVI2R(ARM64Reg::W0, value);
RET();
FlushIcacheSection(const_cast<u8*>(fn), const_cast<u8*>(GetCodePtr()));
@ -40,7 +40,7 @@ public:
ResetCodePtr();
const u8* fn = GetCodePtr();
MOVI2R(X0, value);
MOVI2R(ARM64Reg::X0, value);
RET();
FlushIcacheSection(const_cast<u8*>(fn), const_cast<u8*>(GetCodePtr()));