Merge pull request #3694 from degasus/arm

JitArm64: Small improvements.
This commit is contained in:
Ryan Houdek 2016-03-04 18:01:25 -05:00
commit 8f75594de7
16 changed files with 305 additions and 259 deletions

View File

@ -1389,6 +1389,10 @@ void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
{
EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
}
void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
UMADDL(Rd, Rn, Rm, SP);
}
void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
{
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);

View File

@ -517,6 +517,7 @@ public:
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);

View File

@ -86,9 +86,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
// also flush the program counter
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, js.compilerPC);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(pc));
ADD(WA, WA, 4);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
gpr.Unlock(WA);
}
@ -102,14 +102,14 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
if (js.isLastInstruction)
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
WriteExceptionExit(WA);
}
else
{
// only exit if ppcstate.npc was changed
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
ARM64Reg WB = gpr.GetReg();
MOVI2R(WB, js.compilerPC + 4);
CMP(WB, WA);
@ -132,8 +132,8 @@ void JitArm64::HLEFunction(UGeckoInstruction inst)
BLR(X30);
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
WriteExitDestInR(WA);
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
WriteExit(WA);
}
void JitArm64::DoNothing(UGeckoInstruction inst)
@ -161,18 +161,18 @@ void JitArm64::Cleanup()
void JitArm64::DoDownCount()
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 4096) // We can enlarge this if we used rotations
{
SUBS(WA, WA, js.downcountAmount);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
}
else
{
ARM64Reg WB = gpr.GetReg();
MOVI2R(WB, js.downcountAmount);
SUBS(WA, WA, WB);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
gpr.Unlock(WB);
}
gpr.Unlock(WA);
@ -193,93 +193,87 @@ void JitArm64::WriteExit(u32 destination)
linkData.exitAddress = destination;
linkData.exitPtrs = GetWritableCodePtr();
linkData.linkStatus = false;
b->linkData.push_back(linkData);
// the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(WA, destination);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
MOVI2R(XA, (u64)asm_routines.dispatcher);
BR(XA);
gpr.Unlock(WA);
b->linkData.push_back(linkData);
}
void JitArm64::WriteExceptionExit(ARM64Reg dest)
{
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
gpr.Unlock(dest);
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions);
BLR(EncodeRegTo64(dest));
LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher);
BR(EncodeRegTo64(dest));
MOVI2R(X30, (u64)asm_routines.dispatcher);
MOVI2R(DISPATCHER_PC, destination);
BR(X30);
}
void JitArm64::WriteExceptionExit()
void JitArm64::WriteExit(ARM64Reg Reg)
{
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
MOVI2R(XA, (u64)&PowerPC::CheckExceptions);
BLR(XA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
MOVI2R(XA, (u64)asm_routines.dispatcher);
BR(XA);
gpr.Unlock(WA);
}
void JitArm64::WriteExternalExceptionExit(ARM64Reg dest)
{
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
gpr.Unlock(dest);
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions);
BLR(EncodeRegTo64(dest));
LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc));
MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher);
BR(EncodeRegTo64(dest));
}
void JitArm64::WriteExitDestInR(ARM64Reg Reg)
{
STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc));
if (Reg != DISPATCHER_PC)
MOV(DISPATCHER_PC, Reg);
gpr.Unlock(Reg);
Cleanup();
DoDownCount();
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(EncodeRegTo64(Reg), (u64)asm_routines.dispatcher);
BR(EncodeRegTo64(Reg));
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
}
void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
{
Cleanup();
DoDownCount();
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
MOVI2R(DISPATCHER_PC, destination);
FixupBranch no_exceptions = CBZ(W30);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
if (only_external)
MOVI2R(X30, (u64)&PowerPC::CheckExternalExceptions);
else
MOVI2R(X30, (u64)&PowerPC::CheckExceptions);
BLR(X30);
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
SetJumpTarget(no_exceptions);
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
}
void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
{
Cleanup();
DoDownCount();
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_exceptions = CBZ(WA);
gpr.Unlock(WA);
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
if (only_external)
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions);
else
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions);
BLR(EncodeRegTo64(dest));
LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
SetJumpTarget(no_exceptions);
if (dest != DISPATCHER_PC)
MOV(DISPATCHER_PC, dest);
gpr.Unlock(dest);
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
}
void JitArm64::DumpCode(const u8* start, const u8* end)
@ -420,8 +414,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
FixupBranch bail = B(CC_PL);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(WA, js.blockStart);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
MOVI2R(DISPATCHER_PC, js.blockStart);
MOVI2R(XA, (u64)asm_routines.doTiming);
BR(XA);
gpr.Unlock(WA);
@ -449,13 +442,13 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
int gqr = *code_block.m_gqr_used.begin();
if (!code_block.m_gqr_modified[gqr] && !GQR(gqr))
{
LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4);
LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0]) + gqr * 4);
FixupBranch no_fail = CBZ(W0);
FixupBranch fail = B();
SwitchToFarCode();
SetJumpTarget(fail);
MOVI2R(W0, js.blockStart);
STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc));
MOVI2R(DISPATCHER_PC, js.blockStart);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck);
BLR(X1);
@ -473,10 +466,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
gpr.Start(js.gpa);
fpr.Start(js.fpa);
// Setup memory base register
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
MOVI2R(X28, (u64)base);
if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
@ -517,9 +506,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
ABI_PopRegisters(regs_in_use);
// Inline exception check
LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(Exceptions));
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
LDR(INDEX_UNSIGNED, W30, X29, PPCSTATE_OFF(msr));
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(W30, 11, done_here);
MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, W30, X30, 0);
@ -528,8 +517,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOVI2R(W30, ops[i].address);
WriteExternalExceptionExit(W30);
WriteExceptionExit(js.compilerPC, true);
SwitchToNearCode();
SetJumpTarget(exit);
gpr.Unlock(W30);
@ -544,24 +532,24 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch NoExtException = TBZ(WA, 3); // EXCEPTION_EXTERNAL_INT
FixupBranch Exception = B();
SwitchToFarCode();
const u8* done_here = GetCodePtr();
FixupBranch exit = B();
SetJumpTarget(Exception);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 11, done_here);
MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, WA, XA, 0);
TST(WA, 23, 2);
B(CC_EQ, done_here);
gpr.Unlock(WA);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
MOVI2R(WA, ops[i].address);
WriteExternalExceptionExit(WA);
WriteExceptionExit(js.compilerPC, true);
SwitchToNearCode();
SetJumpTarget(NoExtException);
SetJumpTarget(exit);
@ -573,7 +561,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
{
//This instruction uses FPU - needs to add FP exception bailout
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit
FixupBranch far = B();
@ -583,12 +571,13 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
ORR(WA, WA, 26, 0); // EXCEPTION_FPU_UNAVAILABLE
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
MOVI2R(WA, js.compilerPC);
WriteExceptionExit(WA);
gpr.Unlock(WA);
WriteExceptionExit(js.compilerPC);
SwitchToNearCode();

View File

@ -17,14 +17,6 @@
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock
{
public:
@ -95,6 +87,8 @@ public:
void rlwnmx(UGeckoInstruction inst);
void srawix(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst);
void mulhwx(UGeckoInstruction inst);
void mulhwux(UGeckoInstruction inst);
void addic(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
@ -206,6 +200,7 @@ private:
{
nearcode = GetWritableCodePtr();
SetCodePtrUnsafe(farcode.GetWritableCodePtr());
AlignCode16();
}
void SwitchToNearCode()
@ -237,10 +232,9 @@ private:
// Exits
void WriteExit(u32 destination);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest);
void WriteExceptionExit();
void WriteExternalExceptionExit(ARM64Reg dest);
void WriteExitDestInR(Arm64Gen::ARM64Reg dest);
void WriteExit(Arm64Gen::ARM64Reg dest);
void WriteExceptionExit(u32 destination, bool only_external = false);
void WriteExceptionExit(Arm64Gen::ARM64Reg dest, bool only_external = false);
FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);

View File

@ -29,9 +29,8 @@ void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
// must fit within the code generated in JitArm64::WriteExit
ARM64XEmitter emit((u8 *)location);
emit.MOVI2R(W0, address);
emit.MOVI2R(X30, (u64)jit->GetAsmRoutines()->dispatcher);
emit.STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc));
emit.MOVI2R(DISPATCHER_PC, address);
emit.BR(X30);
emit.FlushIcache();
}

View File

@ -60,28 +60,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{
m_float_emit.FCVT(32, 64, D0, RS);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, D0, X28, addr);
m_float_emit.STR(32, D0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.REV32(8, D0, RS);
m_float_emit.STR(32, D0, X28, addr);
m_float_emit.STR(32, D0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
{
m_float_emit.FCVTN(32, D0, RS);
m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(64, Q0, X28, addr);
m_float_emit.STR(64, Q0, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.REV32(8, D0, RS);
m_float_emit.STR(64, Q0, X28, addr);
m_float_emit.STR(64, Q0, MEM_REG, addr);
}
else
{
m_float_emit.REV64(8, Q0, RS);
m_float_emit.STR(64, Q0, X28, addr);
m_float_emit.STR(64, Q0, MEM_REG, addr);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
@ -89,12 +89,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
m_float_emit.LDR(32, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
else
{
m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr);
m_float_emit.LDR(64, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
}
@ -107,27 +107,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
REV16(temp, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(temp, X28, addr);
STR(temp, MEM_REG, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
STRH(temp, X28, addr);
STRH(temp, MEM_REG, addr);
else
STRB(RS, X28, addr);
STRB(RS, MEM_REG, addr);
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
// This literally only stores 32bytes of zeros to the target address
ADD(addr, addr, X28);
ADD(addr, addr, MEM_REG);
STP(INDEX_SIGNED, ZR, ZR, addr, 0);
STP(INDEX_SIGNED, ZR, ZR, addr, 16);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
LDR(RS, X28, addr);
LDR(RS, MEM_REG, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
LDRH(RS, X28, addr);
LDRH(RS, MEM_REG, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(RS, X28, addr);
LDRB(RS, MEM_REG, addr);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{

View File

@ -26,15 +26,13 @@ void JitArm64::sc(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
ORR(WA, WA, 31, 0); // Same as WA | EXCEPTION_SYSCALL
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
MOVI2R(WA, js.compilerPC + 4);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc));
gpr.Unlock(WA);
// WA is unlocked in this function
WriteExceptionExit(WA);
WriteExceptionExit(js.compilerPC + 4);
}
void JitArm64::rfi(UGeckoInstruction inst)
@ -60,18 +58,18 @@ void JitArm64::rfi(UGeckoInstruction inst)
MOVI2R(WA, (~mask) & clearMSR13);
MOVI2R(WB, mask & clearMSR13);
LDR(INDEX_UNSIGNED, WC, X29, PPCSTATE_OFF(msr));
LDR(INDEX_UNSIGNED, WC, PPC_REG, PPCSTATE_OFF(msr));
AND(WC, WC, WB, ArithOption(WC, ST_LSL, 0)); // rD = Masked MSR
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
AND(WA, WA, WB, ArithOption(WA, ST_LSL, 0)); // rB contains masked SRR1 here
ORR(WA, WA, WC, ArithOption(WA, ST_LSL, 0)); // rB = Masked MSR OR masked SRR1
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); // STR rB in to rA
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR0]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR0]));
gpr.Unlock(WB, WC);
// WA is unlocked in this function
@ -94,10 +92,9 @@ void JitArm64::bx(UGeckoInstruction inst)
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, Jumpto);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR]));
MOVI2R(WA, js.compilerPC + 4);
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
gpr.Unlock(WA);
}
@ -109,8 +106,9 @@ void JitArm64::bx(UGeckoInstruction inst)
MOVI2R(XA, (u64)&CoreTiming::Idle);
BLR(XA);
MOVI2R(WA, js.compilerPC);
WriteExceptionExit(WA);
gpr.Unlock(WA);
WriteExceptionExit(js.compilerPC);
}
WriteExit(destination);
@ -125,9 +123,9 @@ void JitArm64::bcx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(WA, WA, 1);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR]));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR]));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B(CC_NEQ);
@ -149,9 +147,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
if (inst.LK)
{
u32 Jumpto = js.compilerPC + 4;
MOVI2R(WA, Jumpto);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR]));
MOVI2R(WA, js.compilerPC + 4);
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
}
gpr.Unlock(WA);
@ -203,17 +200,16 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
{
ARM64Reg WB = gpr.GetReg();
u32 Jumpto = js.compilerPC + 4;
MOVI2R(WB, Jumpto);
STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR]));
MOVI2R(WB, js.compilerPC + 4);
STR(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
gpr.Unlock(WB);
}
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR]));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
WriteExitDestInR(WA);
WriteExit(WA);
}
void JitArm64::bclrx(UGeckoInstruction inst)
@ -225,9 +221,9 @@ void JitArm64::bclrx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR]));
SUBS(WA, WA, 1);
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR]));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_CTR]));
if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = B(CC_NEQ);
@ -246,22 +242,21 @@ void JitArm64::bclrx(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(far);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
if (inst.LK)
{
ARM64Reg WB = gpr.GetReg();
u32 Jumpto = js.compilerPC + 4;
MOVI2R(WB, Jumpto);
STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR]));
MOVI2R(WB, js.compilerPC + 4);
STR(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
gpr.Unlock(WB);
}
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
WriteExitDestInR(WA);
WriteExit(WA);
SwitchToNearCode();

View File

@ -264,7 +264,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
}
SetJumpTarget(continue1);
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA);
}

View File

@ -26,12 +26,12 @@ void JitArm64::ComputeRC(ARM64Reg reg, int crf, bool needs_sext)
SXTW(XA, reg);
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[crf]));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
}
else
{
STR(INDEX_UNSIGNED, EncodeRegTo64(reg), X29, PPCSTATE_OFF(cr_val[crf]));
STR(INDEX_UNSIGNED, EncodeRegTo64(reg), PPC_REG, PPCSTATE_OFF(cr_val[crf]));
}
}
@ -44,7 +44,7 @@ void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext)
if (imm & 0x80000000 && needs_sext)
SXTW(XA, WA);
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[crf]));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
}
@ -57,12 +57,12 @@ void JitArm64::ComputeCarry(bool Carry)
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, 1);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
return;
}
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca));
}
void JitArm64::ComputeCarry()
@ -72,7 +72,7 @@ void JitArm64::ComputeCarry()
ARM64Reg WA = gpr.GetReg();
CSINC(WA, WSP, WSP, CC_CC);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
@ -447,7 +447,7 @@ void JitArm64::cmp(UGeckoInstruction inst)
SXTW(XB, RB);
SUB(XA, XA, XB);
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA, WB);
}
@ -474,7 +474,7 @@ void JitArm64::cmpl(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA);
}
@ -540,7 +540,7 @@ void JitArm64::cmpli(UGeckoInstruction inst)
SUB(XA, EncodeRegTo64(gpr.R(a)), XA);
}
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA);
}
@ -641,7 +641,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
ANDS(WSP, WA, RA, ArithOption(RA, ST_LSL, 0));
CSINC(WA, WSP, WSP, CC_EQ);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
}
else
@ -650,7 +650,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s);
MOV(RA, RS);
STRB(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca));
}
}
@ -704,7 +704,6 @@ void JitArm64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
@ -747,6 +746,56 @@ void JitArm64::mullwx(UGeckoInstruction inst)
}
}
void JitArm64::mulhwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, (u32)((u64)(((s64)i * (s64)j) ) >> 32));
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::mulhwux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, (u32)( ( (u64)i * (u64)j) >> 32) );
if (inst.Rc)
ComputeRC(gpr.GetImm(d), 0);
}
else
{
gpr.BindToRegister(d, d == a || d == b);
UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
}
void JitArm64::addzex(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -759,14 +808,14 @@ void JitArm64::addzex(UGeckoInstruction inst)
{
gpr.BindToRegister(d, true);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
else
{
gpr.BindToRegister(d, false);
LDRB(INDEX_UNSIGNED, gpr.R(d), X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, gpr.R(d), PPC_REG, PPCSTATE_OFF(xer_ca));
ADDS(gpr.R(d), gpr.R(a), gpr.R(d));
}
@ -814,7 +863,7 @@ void JitArm64::subfex(UGeckoInstruction inst)
gpr.BindToRegister(d, false);
MOVI2R(gpr.R(d), ~i + j);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADD(gpr.R(d), gpr.R(d), WA);
gpr.Unlock(WA);
@ -840,7 +889,7 @@ void JitArm64::subfex(UGeckoInstruction inst)
gpr.BindToRegister(d, d == a || d == b);
// upload the carry state
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
// d = ~a + b + carry;
@ -935,7 +984,7 @@ void JitArm64::addex(UGeckoInstruction inst)
gpr.BindToRegister(d, false);
MOVI2R(gpr.R(d), i + j);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADD(gpr.R(d), gpr.R(d), WA);
gpr.Unlock(WA);
@ -961,7 +1010,7 @@ void JitArm64::addex(UGeckoInstruction inst)
// upload the carry state
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
gpr.Unlock(WA);

View File

@ -463,19 +463,15 @@ void JitArm64::lXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, (u64)&CoreTiming::Idle);
BLR(XA);
gpr.Unlock(WA);
WriteExceptionExit();
WriteExceptionExit(js.compilerPC);
SwitchToNearCode();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
@ -597,7 +593,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
}
ADD(XA, XA, X28);
ADD(XA, XA, MEM_REG);
for (int i = inst.RD; i < 32; i++)
{

View File

@ -45,9 +45,9 @@ void JitArm64::psq_l(UGeckoInstruction inst)
if (inst.RA || update) // Always uses the register on update
{
if (offset >= 0)
ADD(addr_reg, gpr.R(inst.RA), offset);
ADD(addr_reg, arm_addr, offset);
else
SUB(addr_reg, gpr.R(inst.RA), std::abs(offset));
SUB(addr_reg, arm_addr, std::abs(offset));
}
else
{
@ -65,18 +65,18 @@ void JitArm64::psq_l(UGeckoInstruction inst)
VS = fpr.RW(inst.RS, REG_REG_SINGLE);
if (!inst.W)
{
ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28);
ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), MEM_REG);
m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg));
}
else
{
m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28);
m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), MEM_REG);
}
m_float_emit.REV32(8, EncodeRegToDouble(VS), EncodeRegToDouble(VS));
}
else
{
LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale
@ -179,7 +179,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
m_float_emit.FCVTN(32, D0, VS);
}
LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
LDR(INDEX_UNSIGNED, scale_reg, PPC_REG, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFM(type_reg, scale_reg, 0, 2); // Type
UBFM(scale_reg, scale_reg, 8, 13); // Scale

View File

@ -101,7 +101,7 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
{
ARM64Reg host_reg = reg.GetReg();
if (reg.IsDirty())
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
if (!maintain_state)
{
@ -113,14 +113,14 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
{
if (!reg.GetImm())
{
m_emit->STR(INDEX_UNSIGNED, WSP, X29, PPCSTATE_OFF(gpr[preg]));
m_emit->STR(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(gpr[preg]));
}
else
{
ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm());
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
UnlockRegister(host_reg);
}
@ -147,7 +147,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
ARM64Reg RX1 = R(i);
ARM64Reg RX2 = R(i + 1);
m_emit->STP(INDEX_SIGNED, RX1, RX2, X29, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32));
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, PPCSTATE_OFF(gpr[0]) + i * sizeof(u32));
if (!maintain_state)
{
UnlockRegister(RX1);
@ -210,7 +210,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
ARM64Reg host_reg = GetReg();
reg.Load(host_reg);
reg.SetDirty(false);
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
return host_reg;
}
break;
@ -242,7 +242,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load)
ARM64Reg host_reg = GetReg();
reg.Load(host_reg);
if (do_load)
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(gpr[preg]));
}
}
@ -345,7 +345,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
{
// Load the high 64bits from the file and insert them in to the high 64bits of the host register
ARM64Reg tmp_reg = GetReg();
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
UnlockRegister(tmp_reg);
@ -399,7 +399,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
reg.Load(host_reg, REG_LOWER_PAIR);
}
reg.SetDirty(false);
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
return host_reg;
}
default:
@ -446,7 +446,7 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
case REG_REG:
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
break;
case REG_DUP_SINGLE:
flush_reg = GetReg();
@ -454,7 +454,7 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
// fall through
case REG_DUP:
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
break;
default:
// All other types doesn't store anything in PSR1.
@ -543,7 +543,7 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
store_size = 64;
if (dirty)
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
@ -557,9 +557,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{
// If the paired registers were at the start of ppcState we could do an STP here.
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
//m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
//m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg][1]));
}
if (!maintain_state)

View File

@ -9,12 +9,24 @@
#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCAnalyst.h"
// Dedicated host registers
// X29 = ppcState pointer
using namespace Arm64Gen;
// Dedicated host registers
static const ARM64Reg MEM_REG = X28; // memory base register
static const ARM64Reg PPC_REG = X29; // ppcState pointer
static const ARM64Reg DISPATCHER_PC = W26; // register for PC when calling the dispatcher
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
// Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
static_assert(PPCSTATE_OFF(xer_ca) < 4096, "STRB can't store xer_ca!");
static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
enum RegType
{
REG_NOTLOADED = 0,

View File

@ -23,20 +23,20 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field]));
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61);
break;
case CR_EQ_BIT: // check bits 31-0 == 0
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(cr_val[field]));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? CBZ(WA) : CBNZ(WA);
break;
case CR_GT_BIT: // check val > 0
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field]));
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
CMP(XA, SP);
branch = B(jump_if_set ? CC_GT : CC_LE);
break;
case CR_LT_BIT: // check bit 62 set
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field]));
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
break;
default:
@ -53,7 +53,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr));
STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));
gpr.Flush(FlushMode::FLUSH_ALL);
fpr.Flush(FlushMode::FLUSH_ALL);
@ -67,7 +67,7 @@ void JitArm64::mfmsr(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(msr));
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF(msr));
}
void JitArm64::mcrf(UGeckoInstruction inst)
@ -79,8 +79,8 @@ void JitArm64::mcrf(UGeckoInstruction inst)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[inst.CRFD]));
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
gpr.Unlock(WA);
}
}
@ -91,7 +91,7 @@ void JitArm64::mfsr(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(sr[inst.SR]));
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm64::mtsr(UGeckoInstruction inst)
@ -100,7 +100,7 @@ void JitArm64::mtsr(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(sr[inst.SR]));
STR(INDEX_UNSIGNED, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(sr[inst.SR]));
}
void JitArm64::mfsrin(UGeckoInstruction inst)
@ -116,7 +116,7 @@ void JitArm64::mfsrin(UGeckoInstruction inst)
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
ADD(index64, PPC_REG, index64, ArithOption(index64, ST_LSL, 2));
LDR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
@ -135,7 +135,7 @@ void JitArm64::mtsrin(UGeckoInstruction inst)
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
ADD(index64, PPC_REG, index64, ArithOption(index64, ST_LSL, 2));
STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
@ -193,14 +193,12 @@ void JitArm64::twx(UGeckoInstruction inst)
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
ORR(WA, WA, 24, 0); // Same as WA | EXCEPTION_PROGRAM
STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions));
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
gpr.Unlock(WA);
MOVI2R(WA, js.compilerPC);
// WA is unlocked in this function
WriteExceptionExit(WA);
WriteExceptionExit(js.compilerPC);
SwitchToNearCode();
@ -252,7 +250,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue);
LDR(INDEX_UNSIGNED, XB, XB, 0);
ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(spr[SPR_TL]));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL]));
if (MergeAllowedNextInstructions(1))
{
@ -295,10 +293,10 @@ void JitArm64::mfspr(UGeckoInstruction inst)
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
ARM64Reg WA = gpr.GetReg();
LDRH(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(xer_stringctrl));
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
LDRH(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(xer_stringctrl));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_CA_SHIFT));
LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov));
ORR(RD, RD, WA, ArithOption(WA, ST_LSL, XER_OV_SHIFT));
gpr.Unlock(WA);
}
@ -309,7 +307,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
default:
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
LDR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
LDR(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(spr) + iIndex * 4);
break;
}
}
@ -359,11 +357,11 @@ void JitArm64::mtspr(UGeckoInstruction inst)
ARM64Reg RD = gpr.R(inst.RD);
ARM64Reg WA = gpr.GetReg();
AND(WA, RD, 24, 30);
STRH(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_stringctrl));
STRH(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_stringctrl));
UBFM(WA, RD, XER_CA_SHIFT, XER_CA_SHIFT + 1);
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
UBFM(WA, RD, XER_OV_SHIFT, 31); // Same as WA = RD >> XER_OV_SHIFT
STRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_so_ov));
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA);
}
break;
@ -373,7 +371,7 @@ void JitArm64::mtspr(UGeckoInstruction inst)
// OK, this is easy.
ARM64Reg RD = gpr.R(inst.RD);
STR(INDEX_UNSIGNED, RD, X29, PPCSTATE_OFF(spr) + iIndex * 4);
STR(INDEX_UNSIGNED, RD, PPC_REG, PPCSTATE_OFF(spr) + iIndex * 4);
}
void JitArm64::crXXX(UGeckoInstruction inst)
@ -390,7 +388,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
switch (bit)
{
case CR_SO_BIT:
@ -409,7 +407,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62)
break;
}
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
gpr.Unlock(WA);
return;
}
@ -423,7 +421,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
if (bit != CR_GT_BIT)
{
@ -456,7 +454,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * field);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
gpr.Unlock(WA);
return;
}
@ -481,7 +479,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WC = gpr.GetReg();
ARM64Reg XC = EncodeRegTo64(WC);
LDR(INDEX_UNSIGNED, XC, X29, PPCSTATE_OFF(cr_val) + 8 * field);
LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
@ -538,7 +536,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
LDR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field);
LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
// Gross but necessary; if the input is totally zero and we set SO or LT,
// or even just add the (1<<32), GT will suddenly end up set without us
@ -576,7 +574,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
}
ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XB, X29, PPCSTATE_OFF(cr_val) + 8 * field);
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field);
gpr.Unlock(WA);
gpr.Unlock(WB);
@ -627,7 +625,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
}
LDR(XA, XB, ArithOption(XA, true));
STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val) + 8 * i);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i);
}
}
gpr.Unlock(WA, WB);

View File

@ -184,8 +184,8 @@ static GekkoOPTemplate table31[] =
{1003, &JitArm64::FallBackToInterpreter}, // divwox
{459, &JitArm64::divwux}, // divwux
{971, &JitArm64::divwux}, // divwuox
{75, &JitArm64::FallBackToInterpreter}, // mulhwx
{11, &JitArm64::FallBackToInterpreter}, // mulhwux
{75, &JitArm64::mulhwx}, // mulhwx
{11, &JitArm64::mulhwux}, // mulhwux
{235, &JitArm64::mullwx}, // mullwx
{747, &JitArm64::mullwx}, // mullwox
{104, &JitArm64::negx}, // negx

View File

@ -25,7 +25,12 @@ void JitArm64AsmRoutineManager::Generate()
ABI_PushRegisters(regs_to_save);
MOVI2R(X29, (u64)&PowerPC::ppcState);
MOVI2R(PPC_REG, (u64)&PowerPC::ppcState);
MOVI2R(MEM_REG, (u64)Memory::logical_base);
// Load the current PC into DISPATCHER_PC
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
FixupBranch to_dispatcher = B();
// If we align the dispatcher to a page then we can load its location with one ADRP instruction
@ -44,11 +49,10 @@ void JitArm64AsmRoutineManager::Generate()
// This block of code gets the address of the compiled block of code
// It runs though to the compiling portion if it isn't found
LDR(INDEX_UNSIGNED, W28, X29, PPCSTATE_OFF(pc)); // Load the current PC into W28
BFM(W28, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK
BFM(DISPATCHER_PC, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK
MOVI2R(X27, (u64)jit->GetBlockCache()->iCache.data());
LDR(W27, X27, X28);
LDR(W27, X27, EncodeRegTo64(DISPATCHER_PC));
FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit
// Success, it is our Jitblock.
@ -60,6 +64,8 @@ void JitArm64AsmRoutineManager::Generate()
SetJumpTarget(JitBlock);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(X30, (u64)&Jit);
BLR(X30);
@ -71,12 +77,14 @@ void JitArm64AsmRoutineManager::Generate()
BLR(X30);
// Does exception checking
LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc));
LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(Exceptions));
FixupBranch no_exceptions = CBZ(W0);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
MOVI2R(X30, (u64)&PowerPC::CheckExceptions);
BLR(X30);
LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc));
STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc));
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
SetJumpTarget(no_exceptions);
// Check the state pointer to see if we are exiting
// Gets checked on every exception check
@ -89,6 +97,7 @@ void JitArm64AsmRoutineManager::Generate()
B(dispatcher);
SetJumpTarget(Exit);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
// Let the waiting thread know we are done leaving
MOVI2R(X0, (u64)&PowerPC::FinishStateMove);
@ -583,7 +592,7 @@ void JitArm64AsmRoutineManager::GenMfcr()
const u8* start = GetCodePtr();
for (int i = 0; i < 8; i++)
{
LDR(INDEX_UNSIGNED, X1, X29, PPCSTATE_OFF(cr_val) + 8 * i);
LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i);
// SO
if (i == 0)