Implement XER optimization on ARMv7 JIT core
Not completely optimized; there's room for improvement here.
This commit is contained in:
parent
5fce109ce1
commit
76697922b4
|
@ -46,51 +46,46 @@ void JitArm::ComputeRC(s32 value, int cr)
|
|||
void JitArm::ComputeCarry()
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
SetCC(CC_CS);
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
SetCC(CC_CC);
|
||||
BIC(tmp, tmp, mask);
|
||||
BIC(tmp, tmp, 1);
|
||||
SetCC();
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
|
||||
void JitArm::ComputeCarry(bool Carry)
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
if (Carry)
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
else
|
||||
BIC(tmp, tmp, mask);
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, 1);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
|
||||
void JitArm::GetCarryAndClear(ARMReg reg)
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
AND(reg, tmp, mask);
|
||||
BIC(tmp, tmp, mask);
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
AND(reg, tmp, 1);
|
||||
BIC(tmp, tmp, 1);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
|
||||
void JitArm::FinalizeCarry(ARMReg reg)
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
SetCC(CC_CS);
|
||||
ORR(reg, reg, mask);
|
||||
ORR(reg, reg, 1);
|
||||
SetCC();
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
ORR(tmp, tmp, reg);
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
|
||||
|
@ -107,25 +102,23 @@ void JitArm::subfic(UGeckoInstruction inst)
|
|||
if (imm == 0)
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, mask);
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
BIC(tmp, tmp, 1);
|
||||
// Flags act exactly like subtracting from 0
|
||||
RSBS(gpr.R(d), gpr.R(d), 0);
|
||||
SetCC(CC_CS);
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
SetCC();
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
else if (imm == -1)
|
||||
{
|
||||
// CA is always set in this case
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
ORR(tmp, tmp, mask);
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
ORR(tmp, tmp, 1);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
|
||||
MVN(gpr.R(d), gpr.R(d));
|
||||
|
@ -134,35 +127,33 @@ void JitArm::subfic(UGeckoInstruction inst)
|
|||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
ARMReg rA = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
MOVI2R(rA, imm + 1);
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, mask);
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
BIC(tmp, tmp, 1);
|
||||
// Flags act exactly like subtracting from 0
|
||||
MVN(gpr.R(d), gpr.R(d));
|
||||
ADDS(gpr.R(d), gpr.R(d), rA);
|
||||
// Output carry is inverted
|
||||
SetCC(CC_CS);
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
SetCC();
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp, rA);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
MOVI2R(gpr.R(d), imm);
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, mask);
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
BIC(tmp, tmp, 1);
|
||||
// Flags act exactly like subtracting from 0
|
||||
SUBS(gpr.R(d), gpr.R(d), gpr.R(a));
|
||||
// Output carry is inverted
|
||||
SetCC(CC_CS);
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
SetCC();
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
// This instruction has no RC flag
|
||||
|
@ -871,7 +862,6 @@ void JitArm::srawix(UGeckoInstruction inst)
|
|||
ARMReg RA = gpr.R(a);
|
||||
ARMReg RS = gpr.R(s);
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
|
||||
MOV(tmp, RS);
|
||||
ASR(RA, RS, amount);
|
||||
|
@ -880,12 +870,12 @@ void JitArm::srawix(UGeckoInstruction inst)
|
|||
LSL(tmp, tmp, 32 - amount);
|
||||
TST(tmp, RA);
|
||||
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, mask);
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
BIC(tmp, tmp, 1);
|
||||
SetCC(CC_NEQ);
|
||||
ORR(tmp, tmp, mask);
|
||||
ORR(tmp, tmp, 1);
|
||||
SetCC();
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
else
|
||||
|
@ -895,10 +885,9 @@ void JitArm::srawix(UGeckoInstruction inst)
|
|||
MOV(RA, RS);
|
||||
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
BIC(tmp, tmp, mask);
|
||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
BIC(tmp, tmp, 1);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
gpr.Unlock(tmp);
|
||||
|
||||
}
|
||||
|
|
|
@ -76,11 +76,10 @@ void JitArm::mtspr(UGeckoInstruction inst)
|
|||
case SPR_SRR0:
|
||||
case SPR_SRR1:
|
||||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
break;
|
||||
|
||||
case SPR_LR:
|
||||
case SPR_CTR:
|
||||
case SPR_XER:
|
||||
case SPR_GQR0:
|
||||
case SPR_GQR0 + 1:
|
||||
case SPR_GQR0 + 2:
|
||||
|
@ -90,8 +89,23 @@ void JitArm::mtspr(UGeckoInstruction inst)
|
|||
case SPR_GQR0 + 6:
|
||||
case SPR_GQR0 + 7:
|
||||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
|
||||
break;
|
||||
case SPR_XER:
|
||||
{
|
||||
ARMReg RD = gpr.R(inst.RD);
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
ARMReg mask = gpr.GetReg();
|
||||
MOVI2R(mask, 0xFF7F);
|
||||
AND(tmp, RD, mask);
|
||||
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
|
||||
LSR(tmp, RD, XER_CA_SHIFT);
|
||||
AND(tmp, tmp, 1);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
LSR(tmp, RD, XER_OV_SHIFT);
|
||||
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
|
||||
gpr.Unlock(tmp, mask);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
FALLBACK_IF(true);
|
||||
}
|
||||
|
@ -116,6 +130,20 @@ void JitArm::mfspr(UGeckoInstruction inst)
|
|||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch (iIndex)
|
||||
{
|
||||
case SPR_XER:
|
||||
{
|
||||
ARMReg RD = gpr.R(inst.RD);
|
||||
ARMReg tmp = gpr.GetReg();
|
||||
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||
LSL(tmp, tmp, XER_CA_SHIFT);
|
||||
ORR(RD, RD, tmp);
|
||||
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
|
||||
LSL(tmp, tmp, XER_OV_SHIFT);
|
||||
ORR(RD, RD, tmp);
|
||||
gpr.Unlock(tmp);
|
||||
}
|
||||
break;
|
||||
case SPR_WPAR:
|
||||
case SPR_DEC:
|
||||
case SPR_TL:
|
||||
|
|
Loading…
Reference in New Issue