Merge pull request #1070 from FioraAeterna/optimizeca4

JIT: Reorganize carry to store flags separately instead of part of XER
This commit is contained in:
Ryan Houdek 2014-09-24 14:31:58 -05:00
commit 0b6ec44e42
12 changed files with 165 additions and 119 deletions

View File

@ -335,16 +335,17 @@ union UFPR
#define XER_CA_SHIFT 29 #define XER_CA_SHIFT 29
#define XER_OV_SHIFT 30 #define XER_OV_SHIFT 30
#define XER_SO_SHIFT 31 #define XER_SO_SHIFT 31
#define XER_CA_MASK (1U << XER_CA_SHIFT) #define XER_OV_MASK 1
#define XER_OV_MASK (1U << XER_OV_SHIFT) #define XER_SO_MASK 2
#define XER_SO_MASK (1U << XER_SO_SHIFT)
// XER // XER
union UReg_XER union UReg_XER
{ {
struct struct
{ {
u32 BYTE_COUNT : 7; u32 BYTE_COUNT : 7;
u32 : 22; u32 : 1;
u32 BYTE_CMP : 8;
u32 : 13;
u32 CA : 1; u32 CA : 1;
u32 OV : 1; u32 OV : 1;
u32 SO : 1; u32 SO : 1;

View File

@ -499,7 +499,7 @@ void Interpreter::lhzx(UGeckoInstruction _inst)
void Interpreter::lswx(UGeckoInstruction _inst) void Interpreter::lswx(UGeckoInstruction _inst)
{ {
u32 EA = Helper_Get_EA_X(_inst); u32 EA = Helper_Get_EA_X(_inst);
u32 n = rSPR(SPR_XER) & 0x7F; u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
int r = _inst.RD; int r = _inst.RD;
int i = 0; int i = 0;
@ -727,7 +727,7 @@ void Interpreter::stswi(UGeckoInstruction _inst)
void Interpreter::stswx(UGeckoInstruction _inst) void Interpreter::stswx(UGeckoInstruction _inst)
{ {
u32 EA = Helper_Get_EA_X(_inst); u32 EA = Helper_Get_EA_X(_inst);
u32 n = rSPR(SPR_XER) & 0x7F; u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
int r = _inst.RS; int r = _inst.RS;
int i = 0; int i = 0;

View File

@ -112,9 +112,9 @@ void Interpreter::mtfsfx(UGeckoInstruction _inst)
void Interpreter::mcrxr(UGeckoInstruction _inst) void Interpreter::mcrxr(UGeckoInstruction _inst)
{ {
// USES_XER SetCRField(_inst.CRFD, GetXER().Hex >> 28);
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28); PowerPC::ppcState.xer_ca = 0;
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3 PowerPC::ppcState.xer_so_ov = 0;
} }
void Interpreter::mfcr(UGeckoInstruction _inst) void Interpreter::mfcr(UGeckoInstruction _inst)
@ -235,6 +235,9 @@ void Interpreter::mfspr(UGeckoInstruction _inst)
rSPR(iIndex) &= ~1; rSPR(iIndex) &= ~1;
} }
break; break;
case SPR_XER:
rSPR(iIndex) = GetXER().Hex;
break;
} }
m_GPR[_inst.RD] = rSPR(iIndex); m_GPR[_inst.RD] = rSPR(iIndex);
} }
@ -350,6 +353,10 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
case SPR_SDR: case SPR_SDR:
Memory::SDRUpdated(); Memory::SDRUpdated();
break; break;
case SPR_XER:
SetXER(rSPR(iIndex));
break;
} }
} }

View File

@ -22,12 +22,12 @@ void Jit64::GenerateConstantOverflow(bool overflow)
if (overflow) if (overflow)
{ {
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
} }
else else
{ {
//XER[OV] = 0 //XER[OV] = 0
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
} }
} }
@ -36,11 +36,11 @@ void Jit64::GenerateOverflow()
{ {
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
//XER[OV] = 0 //XER[OV] = 0
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
SetJumpTarget(exit); SetJumpTarget(exit);
} }
@ -60,7 +60,7 @@ void Jit64::FinalizeCarry(CCFlags cond)
{ {
// convert the condition to a carry flag (is there a better way?) // convert the condition to a carry flag (is there a better way?)
SETcc(cond, R(RSCRATCH)); SETcc(cond, R(RSCRATCH));
BT(8, R(RSCRATCH), Imm8(0)); SHR(8, R(RSCRATCH), Imm8(1));
} }
js.carryFlagSet = true; js.carryFlagSet = true;
} }
@ -92,23 +92,22 @@ void Jit64::FinalizeCarry(bool ca)
} }
else else
{ {
JitClearCAOV(true, false); JitClearCA();
} }
} }
} }
// Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool oe, bool inv) void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
{ {
// USES_XER
if (oe) if (oe)
{ {
// Make sure not to lose the carry flags (not a big deal, this path is rare). // Make sure not to lose the carry flags (not a big deal, this path is rare).
PUSHF(); PUSHF();
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK))); //XER[OV] = 0
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_SO_MASK | XER_OV_MASK));
SetJumpTarget(jno); SetJumpTarget(jno);
POPF(); POPF();
} }
@ -1792,27 +1791,15 @@ void Jit64::srawix(UGeckoInstruction inst)
if (a != s) if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH)); MOV(32, gpr.R(a), R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops // some optimized common cases that can be done in slightly fewer ops
if (amount == 31) if (amount == 1)
{ {
JitSetCA(); SHR(32, R(RSCRATCH), Imm8(31)); // sign
SAR(32, gpr.R(a), Imm8(31)); AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(true, false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1)); SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
} }
else else
{ {
JitClearCAOV(true, false);
SAR(32, gpr.R(a), Imm8(amount)); SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount)); SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a)); TEST(32, R(RSCRATCH), gpr.R(a));

View File

@ -140,7 +140,6 @@ void Jit64::mtspr(UGeckoInstruction inst)
case SPR_LR: case SPR_LR:
case SPR_CTR: case SPR_CTR:
case SPR_XER:
// These are safe to do the easy way, see the bottom of this function. // These are safe to do the easy way, see the bottom of this function.
break; break;
@ -155,6 +154,24 @@ void Jit64::mtspr(UGeckoInstruction inst)
// These are safe to do the easy way, see the bottom of this function. // These are safe to do the easy way, see the bottom of this function.
break; break;
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, true, false);
MOV(32, R(RSCRATCH), gpr.R(d));
AND(32, R(RSCRATCH), Imm32(0xff7f));
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(8, R(RSCRATCH), Imm8(1));
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
MOV(32, R(RSCRATCH), gpr.R(d));
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
gpr.UnlockAll();
return;
default: default:
FALLBACK_IF(true); FALLBACK_IF(true);
} }
@ -238,6 +255,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
gpr.UnlockAllX(); gpr.UnlockAllX();
break; break;
} }
case SPR_XER:
gpr.Lock(d);
gpr.BindToRegister(d, false);
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
OR(32, gpr.R(d), R(RSCRATCH));
break;
case SPR_WPAR: case SPR_WPAR:
case SPR_DEC: case SPR_DEC:
case SPR_PMC1: case SPR_PMC1:
@ -422,17 +451,20 @@ void Jit64::mcrxr(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
// USES_CR
// Copy XER[0-3] into CR[inst.CRFD] // Copy XER[0-3] into CR[inst.CRFD]
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
SHR(32, R(RSCRATCH), Imm8(28)); MOVZX(32, 8, RSCRATCH2, PPCSTATE(xer_so_ov));
// [0 SO OV CA]
LEA(32, RSCRATCH, MComplex(RSCRATCH, RSCRATCH2, SCALE_2, 0));
// [SO OV CA 0] << 3
SHL(32, R(RSCRATCH), Imm8(4));
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable));
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
// Clear XER[0-3] // Clear XER[0-3]
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); MOV(8, PPCSTATE(xer_ca), Imm8(0));
MOV(8, PPCSTATE(xer_so_ov), Imm8(0));
} }
void Jit64::crXXX(UGeckoInstruction inst) void Jit64::crXXX(UGeckoInstruction inst)

View File

@ -1027,9 +1027,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER])); Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca));
Jit->SHR(32, R(reg), Imm8(29));
Jit->AND(32, R(reg), Imm8(1));
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -1107,7 +1105,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA(); Jit->JitSetCA();
FixupBranch cont = Jit->J(); FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry); Jit->SetJumpTarget(nocarry);
Jit->JitClearCAOV(true, false); Jit->JitClearCA();
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;

View File

@ -46,51 +46,46 @@ void JitArm::ComputeRC(s32 value, int cr)
void JitArm::ComputeCarry() void JitArm::ComputeCarry()
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
SetCC(CC_CS); SetCC(CC_CS);
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
SetCC(CC_CC); SetCC(CC_CC);
BIC(tmp, tmp, mask); BIC(tmp, tmp, 1);
SetCC(); SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
void JitArm::ComputeCarry(bool Carry) void JitArm::ComputeCarry(bool Carry)
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
if (Carry) if (Carry)
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
else else
BIC(tmp, tmp, mask); BIC(tmp, tmp, 1);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
void JitArm::GetCarryAndClear(ARMReg reg) void JitArm::GetCarryAndClear(ARMReg reg)
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); AND(reg, tmp, 1);
AND(reg, tmp, mask); BIC(tmp, tmp, 1);
BIC(tmp, tmp, mask); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
void JitArm::FinalizeCarry(ARMReg reg) void JitArm::FinalizeCarry(ARMReg reg)
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
SetCC(CC_CS); SetCC(CC_CS);
ORR(reg, reg, mask); ORR(reg, reg, 1);
SetCC(); SetCC();
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
ORR(tmp, tmp, reg); ORR(tmp, tmp, reg);
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
@ -107,25 +102,23 @@ void JitArm::subfic(UGeckoInstruction inst)
if (imm == 0) if (imm == 0)
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); BIC(tmp, tmp, 1);
BIC(tmp, tmp, mask);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
RSBS(gpr.R(d), gpr.R(d), 0); RSBS(gpr.R(d), gpr.R(d), 0);
SetCC(CC_CS); SetCC(CC_CS);
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
SetCC(); SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
else if (imm == -1) else if (imm == -1)
{ {
// CA is always set in this case // CA is always set in this case
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); ORR(tmp, tmp, 1);
ORR(tmp, tmp, mask); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
gpr.Unlock(tmp); gpr.Unlock(tmp);
MVN(gpr.R(d), gpr.R(d)); MVN(gpr.R(d), gpr.R(d));
@ -134,35 +127,33 @@ void JitArm::subfic(UGeckoInstruction inst)
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOVI2R(rA, imm + 1); MOVI2R(rA, imm + 1);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, mask); BIC(tmp, tmp, 1);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
MVN(gpr.R(d), gpr.R(d)); MVN(gpr.R(d), gpr.R(d));
ADDS(gpr.R(d), gpr.R(d), rA); ADDS(gpr.R(d), gpr.R(d), rA);
// Output carry is inverted // Output carry is inverted
SetCC(CC_CS); SetCC(CC_CS);
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
SetCC(); SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp, rA); gpr.Unlock(tmp, rA);
} }
} }
else else
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOVI2R(gpr.R(d), imm); MOVI2R(gpr.R(d), imm);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, mask); BIC(tmp, tmp, 1);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
SUBS(gpr.R(d), gpr.R(d), gpr.R(a)); SUBS(gpr.R(d), gpr.R(d), gpr.R(a));
// Output carry is inverted // Output carry is inverted
SetCC(CC_CS); SetCC(CC_CS);
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
SetCC(); SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
// This instruction has no RC flag // This instruction has no RC flag
@ -871,7 +862,6 @@ void JitArm::srawix(UGeckoInstruction inst)
ARMReg RA = gpr.R(a); ARMReg RA = gpr.R(a);
ARMReg RS = gpr.R(s); ARMReg RS = gpr.R(s);
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
MOV(tmp, RS); MOV(tmp, RS);
ASR(RA, RS, amount); ASR(RA, RS, amount);
@ -880,12 +870,12 @@ void JitArm::srawix(UGeckoInstruction inst)
LSL(tmp, tmp, 32 - amount); LSL(tmp, tmp, 32 - amount);
TST(tmp, RA); TST(tmp, RA);
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, mask); BIC(tmp, tmp, 1);
SetCC(CC_NEQ); SetCC(CC_NEQ);
ORR(tmp, tmp, mask); ORR(tmp, tmp, 1);
SetCC(); SetCC();
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }
else else
@ -895,10 +885,9 @@ void JitArm::srawix(UGeckoInstruction inst)
MOV(RA, RS); MOV(RA, RS);
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
Operand2 mask = Operand2(2, 2); // XER_CA_MASK LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); BIC(tmp, tmp, 1);
BIC(tmp, tmp, mask); STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
gpr.Unlock(tmp); gpr.Unlock(tmp);
} }

View File

@ -76,11 +76,10 @@ void JitArm::mtspr(UGeckoInstruction inst)
case SPR_SRR0: case SPR_SRR0:
case SPR_SRR1: case SPR_SRR1:
// These are safe to do the easy way, see the bottom of this function. // These are safe to do the easy way, see the bottom of this function.
break; break;
case SPR_LR: case SPR_LR:
case SPR_CTR: case SPR_CTR:
case SPR_XER:
case SPR_GQR0: case SPR_GQR0:
case SPR_GQR0 + 1: case SPR_GQR0 + 1:
case SPR_GQR0 + 2: case SPR_GQR0 + 2:
@ -90,8 +89,23 @@ void JitArm::mtspr(UGeckoInstruction inst)
case SPR_GQR0 + 6: case SPR_GQR0 + 6:
case SPR_GQR0 + 7: case SPR_GQR0 + 7:
// These are safe to do the easy way, see the bottom of this function. // These are safe to do the easy way, see the bottom of this function.
break; break;
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
ARMReg mask = gpr.GetReg();
MOVI2R(mask, 0xFF7F);
AND(tmp, RD, mask);
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
LSR(tmp, RD, XER_CA_SHIFT);
AND(tmp, tmp, 1);
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSR(tmp, RD, XER_OV_SHIFT);
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(tmp, mask);
}
break;
default: default:
FALLBACK_IF(true); FALLBACK_IF(true);
} }
@ -116,6 +130,20 @@ void JitArm::mfspr(UGeckoInstruction inst)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex) switch (iIndex)
{ {
case SPR_XER:
{
ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg();
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
LSL(tmp, tmp, XER_CA_SHIFT);
ORR(RD, RD, tmp);
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
LSL(tmp, tmp, XER_OV_SHIFT);
ORR(RD, RD, tmp);
gpr.Unlock(tmp);
}
break;
case SPR_WPAR: case SPR_WPAR:
case SPR_DEC: case SPR_DEC:
case SPR_TL: case SPR_TL:

View File

@ -896,30 +896,23 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
void EmuCodeBlock::JitGetAndClearCAOV(bool oe) void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
{ {
if (oe) if (oe)
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0 AND(8, PPCSTATE(xer_so_ov), Imm32(~XER_OV_MASK)); //XER.OV = 0
BTR(32, PPCSTATE(spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0 SHR(8, PPCSTATE(xer_ca), Imm8(1)); //carry = XER.CA, XER.CA = 0
} }
void EmuCodeBlock::JitSetCA() void EmuCodeBlock::JitSetCA()
{ {
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 MOV(8, PPCSTATE(xer_ca), Imm8(1)); //XER.CA = 1
} }
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so // Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
// branchless calculation of CA is probably faster in general. // branchless calculation of CA is probably faster in general.
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode) void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
{ {
SETcc(conditionCode, R(RSCRATCH)); SETcc(conditionCode, PPCSTATE(xer_ca));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
} }
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe) void EmuCodeBlock::JitClearCA()
{ {
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF); MOV(8, PPCSTATE(xer_ca), Imm8(0));
if (mask == 0xFFFFFFFF)
return;
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
} }

View File

@ -121,7 +121,7 @@ public:
void JitGetAndClearCAOV(bool oe); void JitGetAndClearCAOV(bool oe);
void JitSetCA(); void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode); void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool ca, bool oe); void JitClearCA();
void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm);

View File

@ -61,6 +61,12 @@ struct GC_ALIGNED64(PowerPCState)
// This variable should be inside of the CoreTiming namespace if we wanted to be correct. // This variable should be inside of the CoreTiming namespace if we wanted to be correct.
int downcount; int downcount;
// XER, reformatted into byte fields for easier access.
u8 xer_ca;
u8 xer_so_ov; // format: (SO << 1) | OV
// The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support lscbx
u16 xer_stringctrl;
#if _M_X86_64 #if _M_X86_64
// This member exists for the purpose of an assertion in x86 JitBase.cpp // This member exists for the purpose of an assertion in x86 JitBase.cpp
// that its offset <= 0x100. To minimize code size on x86, we want as much // that its offset <= 0x100. To minimize code size on x86, we want as much
@ -252,35 +258,40 @@ inline u32 GetCR()
return PowerPC::CompactCR(); return PowerPC::CompactCR();
} }
// SetCarry/GetCarry may speed up soon.
inline void SetCarry(int ca) inline void SetCarry(int ca)
{ {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca; PowerPC::ppcState.xer_ca = ca;
} }
inline int GetCarry() inline int GetCarry()
{ {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA; return PowerPC::ppcState.xer_ca;
} }
inline UReg_XER GetXER() inline UReg_XER GetXER()
{ {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]); u32 xer = 0;
xer |= PowerPC::ppcState.xer_stringctrl;
xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT;
xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT;
return xer;
} }
inline void SetXER(UReg_XER new_xer) inline void SetXER(UReg_XER new_xer)
{ {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer; PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8);
PowerPC::ppcState.xer_ca = new_xer.CA;
PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV;
} }
inline int GetXER_SO() inline int GetXER_SO()
{ {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO; return PowerPC::ppcState.xer_so_ov >> 1;
} }
inline void SetXER_SO(int value) inline void SetXER_SO(int value)
{ {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value; PowerPC::ppcState.xer_so_ov |= value << 1;
} }
void UpdateFPRF(double dvalue); void UpdateFPRF(double dvalue);

View File

@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread; static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system // Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 33; static const u32 STATE_VERSION = 34;
enum enum
{ {