JitArm64: Use CR cache

This commit is contained in:
MerryMage 2017-04-26 19:37:37 +00:00
parent a9fbf69cad
commit d4e2529f13
6 changed files with 90 additions and 146 deletions

View File

@ -215,7 +215,6 @@ private:
// AsmRoutines
void GenerateAsm();
void GenerateCommonAsm();
void GenMfcr();
// Profiling
void BeginTimeProfile(JitBlock* b);

View File

@ -268,8 +268,8 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
ARM64Reg VA = reg_encoder(fpr.R(a, type));
ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
gpr.BindCRToRegister(crf, false);
ARM64Reg XA = gpr.CR(crf);
FixupBranch pNaN, pLesser, pGreater;
FixupBranch continue1, continue2, continue3;
@ -312,10 +312,6 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
SetJumpTarget(continue3);
}
SetJumpTarget(continue1);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
}
void JitArm64::fctiwzx(UGeckoInstruction inst)

View File

@ -17,33 +17,23 @@ using namespace Arm64Gen;
void JitArm64::ComputeRC(ARM64Reg reg, int crf, bool needs_sext)
{
gpr.BindCRToRegister(crf, false);
if (needs_sext)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
SXTW(XA, reg);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
SXTW(gpr.CR(crf), reg);
}
else
{
STR(INDEX_UNSIGNED, EncodeRegTo64(reg), PPC_REG, PPCSTATE_OFF(cr_val[crf]));
MOV(gpr.CR(crf), EncodeRegTo64(reg));
}
}
void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, imm);
gpr.BindCRToRegister(crf, false);
MOVI2R(gpr.CR(crf), imm);
if (imm & 0x80000000 && needs_sext)
SXTW(XA, WA);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
SXTW(gpr.CR(crf), DecodeReg(gpr.CR(crf)));
}
void JitArm64::ComputeCarry(bool Carry)
@ -425,19 +415,18 @@ void JitArm64::cmp(UGeckoInstruction inst)
return;
}
gpr.BindCRToRegister(crf, false);
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XB = EncodeRegTo64(WB);
ARM64Reg RA = gpr.R(a);
ARM64Reg RB = gpr.R(b);
ARM64Reg CR = gpr.CR(crf);
SXTW(XA, RA);
SXTW(XB, RB);
SXTW(CR, RB);
SUB(CR, XA, CR);
SUB(XA, XA, XB);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA, WB);
gpr.Unlock(WA);
}
void JitArm64::cmpl(UGeckoInstruction inst)
@ -459,11 +448,8 @@ void JitArm64::cmpl(UGeckoInstruction inst)
return;
}
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
gpr.BindCRToRegister(crf, false);
SUB(gpr.CR(crf), EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
}
void JitArm64::cmpi(UGeckoInstruction inst)
@ -507,13 +493,10 @@ void JitArm64::cmpli(UGeckoInstruction inst)
return;
}
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
gpr.BindCRToRegister(crf, false);
ARM64Reg XA = gpr.CR(crf);
SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA);
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
gpr.Unlock(WA);
}
void JitArm64::rlwinmx(UGeckoInstruction inst)

View File

@ -195,13 +195,13 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
reg2.GetType() == REG_REG)
{
size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
ARM64Reg RX1 = R(i);
ARM64Reg RX2 = R(i + 1);
ARM64Reg RX1 = R(GetGuestByIndex(i));
ARM64Reg RX2 = R(GetGuestByIndex(i + 1));
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, ppc_offset);
if (!maintain_state)
{
UnlockRegister(RX1);
UnlockRegister(RX2);
UnlockRegister(DecodeReg(RX1));
UnlockRegister(DecodeReg(RX2));
reg1.Flush();
reg2.Flush();
}

View File

@ -14,35 +14,23 @@
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XA = gpr.CR(field);
ARM64Reg WA = DecodeReg(XA);
FixupBranch branch;
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61);
break;
return jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61);
case CR_EQ_BIT: // check bits 31-0 == 0
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? CBZ(WA) : CBNZ(WA);
break;
return jump_if_set ? CBZ(WA) : CBNZ(WA);
case CR_GT_BIT: // check val > 0
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
CMP(XA, SP);
branch = B(jump_if_set ? CC_GT : CC_LE);
break;
return B(jump_if_set ? CC_GT : CC_LE);
case CR_LT_BIT: // check bit 62 set
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
break;
return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
gpr.Unlock(WA);
return branch;
}
void JitArm64::mtmsr(UGeckoInstruction inst)
@ -79,11 +67,8 @@ void JitArm64::mcrf(UGeckoInstruction inst)
if (inst.CRFS != inst.CRFD)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFS]));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
gpr.Unlock(WA);
gpr.BindCRToRegister(inst.CRFD, false);
MOV(gpr.CR(inst.CRFD), gpr.CR(inst.CRFS));
}
}
@ -92,10 +77,11 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindCRToRegister(inst.CRFD, false);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
ARM64Reg XB = gpr.CR(inst.CRFD);
ARM64Reg WB = DecodeReg(XB);
// Copy XER[0-3] into CR[inst.CRFD]
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
@ -108,13 +94,12 @@ void JitArm64::mcrxr(UGeckoInstruction inst)
MOVP2R(XB, m_crTable.data());
LDR(XB, XB, XA);
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
// Clear XER[0-3]
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA, WB);
gpr.Unlock(WA);
}
void JitArm64::mfsr(UGeckoInstruction inst)
@ -435,9 +420,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
switch (bit)
{
case CR_SO_BIT:
@ -456,8 +440,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62)
break;
}
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA);
return;
}
@ -468,9 +450,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
if (bit != CR_GT_BIT)
{
@ -502,9 +483,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
}
ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA);
return;
}
@ -527,9 +505,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg out = i ? XB : XA;
bool negate = i ? negateB : negateA;
ARM64Reg WC = gpr.GetReg();
ARM64Reg XC = EncodeRegTo64(WC);
LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val[field]));
ARM64Reg XC = gpr.CR(field);
ARM64Reg WC = DecodeReg(XC);
switch (bit)
{
case CR_SO_BIT: // check bit 61 set
@ -557,7 +534,6 @@ void JitArm64::crXXX(UGeckoInstruction inst)
default:
_assert_msg_(DYNA_REC, false, "Invalid CR bit");
}
gpr.Unlock(WC);
}
// Compute combined bit
@ -585,7 +561,10 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WB);
WB = INVALID_REG;
gpr.BindCRToRegister(field, true);
XB = gpr.CR(field);
// Gross but necessary; if the input is totally zero and we set SO or LT,
// or even just add the (1<<32), GT will suddenly end up set without us
@ -623,10 +602,8 @@ void JitArm64::crXXX(UGeckoInstruction inst)
}
ORR(XB, XB, 32, 0, true); // XB | 1<<32
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA);
gpr.Unlock(WB);
}
void JitArm64::mfcr(UGeckoInstruction inst)
@ -634,14 +611,44 @@ void JitArm64::mfcr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.Lock(W0, W1, W2, W30);
BL(GetAsmRoutines()->mfcr);
gpr.Unlock(W1, W2, W30);
gpr.BindToRegister(inst.RD, false);
MOV(gpr.R(inst.RD), W0);
ARM64Reg WA = gpr.R(inst.RD);
ARM64Reg WC = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XC = EncodeRegTo64(WC);
gpr.Unlock(W0);
for (int i = 0; i < 8; i++)
{
ARM64Reg CR = gpr.CR(i);
ARM64Reg WCR = DecodeReg(CR);
// SO
if (i == 0)
{
UBFX(XA, CR, 61, 1);
}
else
{
UBFX(XC, CR, 61, 1);
ORR(XA, XC, XA, ArithOption(XA, ST_LSL, 4));
}
// EQ
ORR(WC, WA, 32 - 1, 0); // WA | 1<<1
CMP(WCR, WZR);
CSEL(WA, WC, WA, CC_EQ);
// GT
ORR(WC, WA, 32 - 2, 0); // WA | 1<<2
CMP(CR, ZR);
CSEL(WA, WC, WA, CC_GT);
// LT
UBFX(XC, CR, 62, 1);
ORR(WA, WA, WC, ArithOption(WC, ST_LSL, 3));
}
gpr.Unlock(WC);
}
void JitArm64::mtcrf(UGeckoInstruction inst)
@ -653,8 +660,6 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
if (crm != 0)
{
ARM64Reg RS = gpr.R(inst.RS);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
MOVP2R(XB, m_crTable.data());
@ -662,20 +667,23 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
{
if ((crm & (0x80 >> i)) != 0)
{
gpr.BindCRToRegister(i, false);
ARM64Reg CR = gpr.CR(i);
ARM64Reg WCR = DecodeReg(CR);
if (i != 7)
LSR(WA, RS, 28 - i * 4);
LSR(WCR, RS, 28 - i * 4);
if (i != 0)
{
if (i != 7)
UBFX(WA, WA, 0, 4);
UBFX(WCR, WCR, 0, 4);
else
UBFX(WA, RS, 0, 4);
UBFX(WCR, RS, 0, 4);
}
LDR(XA, XB, ArithOption(XA, true));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[i]));
LDR(CR, XB, ArithOption(CR, true));
}
}
gpr.Unlock(WA, WB);
gpr.Unlock(WB);
}
}

View File

@ -651,47 +651,5 @@ void JitArm64::GenerateCommonAsm()
pairedStoreQuantized[30] = storeSingleS8Slow;
pairedStoreQuantized[31] = storeSingleS16Slow;
GetAsmRoutines()->mfcr = AlignCode16();
GenMfcr();
}
void JitArm64::GenMfcr()
{
// Input: Nothing
// Returns: W0
// Clobbers: X1, X2
const u8* start = GetCodePtr();
for (int i = 0; i < 8; i++)
{
LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val[i]));
// SO
if (i == 0)
{
UBFX(X0, X1, 61, 1);
}
else
{
ORR(W0, WZR, W0, ArithOption(W0, ST_LSL, 4));
UBFX(X2, X1, 61, 1);
ORR(X0, X0, X2);
}
// EQ
ORR(W2, W0, 32 - 1, 0); // W0 | 1<<1
CMP(W1, WZR);
CSEL(W0, W2, W0, CC_EQ);
// GT
ORR(W2, W0, 32 - 2, 0); // W0 | 1<<2
CMP(X1, ZR);
CSEL(W0, W2, W0, CC_GT);
// LT
UBFX(X2, X1, 62, 1);
ORR(W0, W0, W2, ArithOption(W2, ST_LSL, 3));
}
RET(X30);
JitRegister::Register(start, GetCodePtr(), "JIT_Mfcr");
GetAsmRoutines()->mfcr = nullptr;
}