Merge pull request #1070 from FioraAeterna/optimizeca4
JIT: Reorganize carry to store flags separately instead of part of XER
This commit is contained in:
commit
0b6ec44e42
|
@ -335,16 +335,17 @@ union UFPR
|
||||||
#define XER_CA_SHIFT 29
|
#define XER_CA_SHIFT 29
|
||||||
#define XER_OV_SHIFT 30
|
#define XER_OV_SHIFT 30
|
||||||
#define XER_SO_SHIFT 31
|
#define XER_SO_SHIFT 31
|
||||||
#define XER_CA_MASK (1U << XER_CA_SHIFT)
|
#define XER_OV_MASK 1
|
||||||
#define XER_OV_MASK (1U << XER_OV_SHIFT)
|
#define XER_SO_MASK 2
|
||||||
#define XER_SO_MASK (1U << XER_SO_SHIFT)
|
|
||||||
// XER
|
// XER
|
||||||
union UReg_XER
|
union UReg_XER
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
u32 BYTE_COUNT : 7;
|
u32 BYTE_COUNT : 7;
|
||||||
u32 : 22;
|
u32 : 1;
|
||||||
|
u32 BYTE_CMP : 8;
|
||||||
|
u32 : 13;
|
||||||
u32 CA : 1;
|
u32 CA : 1;
|
||||||
u32 OV : 1;
|
u32 OV : 1;
|
||||||
u32 SO : 1;
|
u32 SO : 1;
|
||||||
|
|
|
@ -499,7 +499,7 @@ void Interpreter::lhzx(UGeckoInstruction _inst)
|
||||||
void Interpreter::lswx(UGeckoInstruction _inst)
|
void Interpreter::lswx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 EA = Helper_Get_EA_X(_inst);
|
u32 EA = Helper_Get_EA_X(_inst);
|
||||||
u32 n = rSPR(SPR_XER) & 0x7F;
|
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
|
||||||
int r = _inst.RD;
|
int r = _inst.RD;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
@ -727,7 +727,7 @@ void Interpreter::stswi(UGeckoInstruction _inst)
|
||||||
void Interpreter::stswx(UGeckoInstruction _inst)
|
void Interpreter::stswx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 EA = Helper_Get_EA_X(_inst);
|
u32 EA = Helper_Get_EA_X(_inst);
|
||||||
u32 n = rSPR(SPR_XER) & 0x7F;
|
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
|
||||||
int r = _inst.RS;
|
int r = _inst.RS;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
|
|
@ -112,9 +112,9 @@ void Interpreter::mtfsfx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void Interpreter::mcrxr(UGeckoInstruction _inst)
|
void Interpreter::mcrxr(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
// USES_XER
|
SetCRField(_inst.CRFD, GetXER().Hex >> 28);
|
||||||
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
|
PowerPC::ppcState.xer_ca = 0;
|
||||||
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
|
PowerPC::ppcState.xer_so_ov = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Interpreter::mfcr(UGeckoInstruction _inst)
|
void Interpreter::mfcr(UGeckoInstruction _inst)
|
||||||
|
@ -235,6 +235,9 @@ void Interpreter::mfspr(UGeckoInstruction _inst)
|
||||||
rSPR(iIndex) &= ~1;
|
rSPR(iIndex) &= ~1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case SPR_XER:
|
||||||
|
rSPR(iIndex) = GetXER().Hex;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
m_GPR[_inst.RD] = rSPR(iIndex);
|
m_GPR[_inst.RD] = rSPR(iIndex);
|
||||||
}
|
}
|
||||||
|
@ -350,6 +353,10 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
|
||||||
case SPR_SDR:
|
case SPR_SDR:
|
||||||
Memory::SDRUpdated();
|
Memory::SDRUpdated();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SPR_XER:
|
||||||
|
SetXER(rSPR(iIndex));
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,12 +22,12 @@ void Jit64::GenerateConstantOverflow(bool overflow)
|
||||||
if (overflow)
|
if (overflow)
|
||||||
{
|
{
|
||||||
//XER[OV/SO] = 1
|
//XER[OV/SO] = 1
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//XER[OV] = 0
|
//XER[OV] = 0
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
|
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,11 +36,11 @@ void Jit64::GenerateOverflow()
|
||||||
{
|
{
|
||||||
FixupBranch jno = J_CC(CC_NO);
|
FixupBranch jno = J_CC(CC_NO);
|
||||||
//XER[OV/SO] = 1
|
//XER[OV/SO] = 1
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
|
||||||
FixupBranch exit = J();
|
FixupBranch exit = J();
|
||||||
SetJumpTarget(jno);
|
SetJumpTarget(jno);
|
||||||
//XER[OV] = 0
|
//XER[OV] = 0
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
|
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ void Jit64::FinalizeCarry(CCFlags cond)
|
||||||
{
|
{
|
||||||
// convert the condition to a carry flag (is there a better way?)
|
// convert the condition to a carry flag (is there a better way?)
|
||||||
SETcc(cond, R(RSCRATCH));
|
SETcc(cond, R(RSCRATCH));
|
||||||
BT(8, R(RSCRATCH), Imm8(0));
|
SHR(8, R(RSCRATCH), Imm8(1));
|
||||||
}
|
}
|
||||||
js.carryFlagSet = true;
|
js.carryFlagSet = true;
|
||||||
}
|
}
|
||||||
|
@ -92,23 +92,22 @@ void Jit64::FinalizeCarry(bool ca)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
JitClearCAOV(true, false);
|
JitClearCA();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assumes CA,OV are clear
|
|
||||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||||
{
|
{
|
||||||
// USES_XER
|
|
||||||
if (oe)
|
if (oe)
|
||||||
{
|
{
|
||||||
// Make sure not to lose the carry flags (not a big deal, this path is rare).
|
// Make sure not to lose the carry flags (not a big deal, this path is rare).
|
||||||
PUSHF();
|
PUSHF();
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
|
//XER[OV] = 0
|
||||||
|
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||||
FixupBranch jno = J_CC(CC_NO);
|
FixupBranch jno = J_CC(CC_NO);
|
||||||
//XER[OV/SO] = 1
|
//XER[OV/SO] = 1
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_SO_MASK | XER_OV_MASK));
|
||||||
SetJumpTarget(jno);
|
SetJumpTarget(jno);
|
||||||
POPF();
|
POPF();
|
||||||
}
|
}
|
||||||
|
@ -1792,27 +1791,15 @@ void Jit64::srawix(UGeckoInstruction inst)
|
||||||
if (a != s)
|
if (a != s)
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH));
|
MOV(32, gpr.R(a), R(RSCRATCH));
|
||||||
// some optimized common cases that can be done in slightly fewer ops
|
// some optimized common cases that can be done in slightly fewer ops
|
||||||
if (amount == 31)
|
if (amount == 1)
|
||||||
{
|
{
|
||||||
JitSetCA();
|
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
||||||
SAR(32, gpr.R(a), Imm8(31));
|
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
||||||
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
|
||||||
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
|
|
||||||
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
|
|
||||||
}
|
|
||||||
else if (amount == 1)
|
|
||||||
{
|
|
||||||
JitClearCAOV(true, false);
|
|
||||||
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
|
||||||
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
|
||||||
SAR(32, gpr.R(a), Imm8(1));
|
SAR(32, gpr.R(a), Imm8(1));
|
||||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
JitClearCAOV(true, false);
|
|
||||||
SAR(32, gpr.R(a), Imm8(amount));
|
SAR(32, gpr.R(a), Imm8(amount));
|
||||||
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
||||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||||
|
|
|
@ -140,7 +140,6 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
||||||
|
|
||||||
case SPR_LR:
|
case SPR_LR:
|
||||||
case SPR_CTR:
|
case SPR_CTR:
|
||||||
case SPR_XER:
|
|
||||||
// These are safe to do the easy way, see the bottom of this function.
|
// These are safe to do the easy way, see the bottom of this function.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -155,6 +154,24 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
||||||
// These are safe to do the easy way, see the bottom of this function.
|
// These are safe to do the easy way, see the bottom of this function.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SPR_XER:
|
||||||
|
gpr.Lock(d);
|
||||||
|
gpr.BindToRegister(d, true, false);
|
||||||
|
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0xff7f));
|
||||||
|
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
|
||||||
|
|
||||||
|
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||||
|
AND(8, R(RSCRATCH), Imm8(1));
|
||||||
|
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
|
||||||
|
|
||||||
|
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||||
|
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
return;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
FALLBACK_IF(true);
|
FALLBACK_IF(true);
|
||||||
}
|
}
|
||||||
|
@ -238,6 +255,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case SPR_XER:
|
||||||
|
gpr.Lock(d);
|
||||||
|
gpr.BindToRegister(d, false);
|
||||||
|
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
|
||||||
|
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
|
||||||
|
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||||
|
OR(32, gpr.R(d), R(RSCRATCH));
|
||||||
|
|
||||||
|
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
|
||||||
|
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||||
|
OR(32, gpr.R(d), R(RSCRATCH));
|
||||||
|
break;
|
||||||
case SPR_WPAR:
|
case SPR_WPAR:
|
||||||
case SPR_DEC:
|
case SPR_DEC:
|
||||||
case SPR_PMC1:
|
case SPR_PMC1:
|
||||||
|
@ -422,17 +451,20 @@ void Jit64::mcrxr(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITSystemRegistersOff);
|
JITDISABLE(bJITSystemRegistersOff);
|
||||||
|
|
||||||
// USES_CR
|
|
||||||
|
|
||||||
// Copy XER[0-3] into CR[inst.CRFD]
|
// Copy XER[0-3] into CR[inst.CRFD]
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
|
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
|
||||||
SHR(32, R(RSCRATCH), Imm8(28));
|
MOVZX(32, 8, RSCRATCH2, PPCSTATE(xer_so_ov));
|
||||||
|
// [0 SO OV CA]
|
||||||
|
LEA(32, RSCRATCH, MComplex(RSCRATCH, RSCRATCH2, SCALE_2, 0));
|
||||||
|
// [SO OV CA 0] << 3
|
||||||
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
|
|
||||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
|
MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable));
|
||||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||||
|
|
||||||
// Clear XER[0-3]
|
// Clear XER[0-3]
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF));
|
MOV(8, PPCSTATE(xer_ca), Imm8(0));
|
||||||
|
MOV(8, PPCSTATE(xer_so_ov), Imm8(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::crXXX(UGeckoInstruction inst)
|
void Jit64::crXXX(UGeckoInstruction inst)
|
||||||
|
|
|
@ -1027,9 +1027,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
X64Reg reg = regFindFreeReg(RI);
|
X64Reg reg = regFindFreeReg(RI);
|
||||||
Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER]));
|
Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca));
|
||||||
Jit->SHR(32, R(reg), Imm8(29));
|
|
||||||
Jit->AND(32, R(reg), Imm8(1));
|
|
||||||
RI.regs[reg] = I;
|
RI.regs[reg] = I;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1107,7 +1105,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
Jit->JitSetCA();
|
Jit->JitSetCA();
|
||||||
FixupBranch cont = Jit->J();
|
FixupBranch cont = Jit->J();
|
||||||
Jit->SetJumpTarget(nocarry);
|
Jit->SetJumpTarget(nocarry);
|
||||||
Jit->JitClearCAOV(true, false);
|
Jit->JitClearCA();
|
||||||
Jit->SetJumpTarget(cont);
|
Jit->SetJumpTarget(cont);
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -46,51 +46,46 @@ void JitArm::ComputeRC(s32 value, int cr)
|
||||||
void JitArm::ComputeCarry()
|
void JitArm::ComputeCarry()
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
|
||||||
SetCC(CC_CS);
|
SetCC(CC_CS);
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
SetCC(CC_CC);
|
SetCC(CC_CC);
|
||||||
BIC(tmp, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm::ComputeCarry(bool Carry)
|
void JitArm::ComputeCarry(bool Carry)
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
|
||||||
if (Carry)
|
if (Carry)
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
else
|
else
|
||||||
BIC(tmp, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm::GetCarryAndClear(ARMReg reg)
|
void JitArm::GetCarryAndClear(ARMReg reg)
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
AND(reg, tmp, 1);
|
||||||
AND(reg, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
BIC(tmp, tmp, mask);
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm::FinalizeCarry(ARMReg reg)
|
void JitArm::FinalizeCarry(ARMReg reg)
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
|
||||||
SetCC(CC_CS);
|
SetCC(CC_CS);
|
||||||
ORR(reg, reg, mask);
|
ORR(reg, reg, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
ORR(tmp, tmp, reg);
|
ORR(tmp, tmp, reg);
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,25 +102,23 @@ void JitArm::subfic(UGeckoInstruction inst)
|
||||||
if (imm == 0)
|
if (imm == 0)
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
BIC(tmp, tmp, 1);
|
||||||
BIC(tmp, tmp, mask);
|
|
||||||
// Flags act exactly like subtracting from 0
|
// Flags act exactly like subtracting from 0
|
||||||
RSBS(gpr.R(d), gpr.R(d), 0);
|
RSBS(gpr.R(d), gpr.R(d), 0);
|
||||||
SetCC(CC_CS);
|
SetCC(CC_CS);
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
else if (imm == -1)
|
else if (imm == -1)
|
||||||
{
|
{
|
||||||
// CA is always set in this case
|
// CA is always set in this case
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
ORR(tmp, tmp, 1);
|
||||||
ORR(tmp, tmp, mask);
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
|
|
||||||
MVN(gpr.R(d), gpr.R(d));
|
MVN(gpr.R(d), gpr.R(d));
|
||||||
|
@ -134,35 +127,33 @@ void JitArm::subfic(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
ARMReg rA = gpr.GetReg();
|
ARMReg rA = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
|
||||||
MOVI2R(rA, imm + 1);
|
MOVI2R(rA, imm + 1);
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
BIC(tmp, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
// Flags act exactly like subtracting from 0
|
// Flags act exactly like subtracting from 0
|
||||||
MVN(gpr.R(d), gpr.R(d));
|
MVN(gpr.R(d), gpr.R(d));
|
||||||
ADDS(gpr.R(d), gpr.R(d), rA);
|
ADDS(gpr.R(d), gpr.R(d), rA);
|
||||||
// Output carry is inverted
|
// Output carry is inverted
|
||||||
SetCC(CC_CS);
|
SetCC(CC_CS);
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp, rA);
|
gpr.Unlock(tmp, rA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
|
||||||
MOVI2R(gpr.R(d), imm);
|
MOVI2R(gpr.R(d), imm);
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
BIC(tmp, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
// Flags act exactly like subtracting from 0
|
// Flags act exactly like subtracting from 0
|
||||||
SUBS(gpr.R(d), gpr.R(d), gpr.R(a));
|
SUBS(gpr.R(d), gpr.R(d), gpr.R(a));
|
||||||
// Output carry is inverted
|
// Output carry is inverted
|
||||||
SetCC(CC_CS);
|
SetCC(CC_CS);
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
// This instruction has no RC flag
|
// This instruction has no RC flag
|
||||||
|
@ -871,7 +862,6 @@ void JitArm::srawix(UGeckoInstruction inst)
|
||||||
ARMReg RA = gpr.R(a);
|
ARMReg RA = gpr.R(a);
|
||||||
ARMReg RS = gpr.R(s);
|
ARMReg RS = gpr.R(s);
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
|
||||||
|
|
||||||
MOV(tmp, RS);
|
MOV(tmp, RS);
|
||||||
ASR(RA, RS, amount);
|
ASR(RA, RS, amount);
|
||||||
|
@ -880,12 +870,12 @@ void JitArm::srawix(UGeckoInstruction inst)
|
||||||
LSL(tmp, tmp, 32 - amount);
|
LSL(tmp, tmp, 32 - amount);
|
||||||
TST(tmp, RA);
|
TST(tmp, RA);
|
||||||
|
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
BIC(tmp, tmp, mask);
|
BIC(tmp, tmp, 1);
|
||||||
SetCC(CC_NEQ);
|
SetCC(CC_NEQ);
|
||||||
ORR(tmp, tmp, mask);
|
ORR(tmp, tmp, 1);
|
||||||
SetCC();
|
SetCC();
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -895,10 +885,9 @@ void JitArm::srawix(UGeckoInstruction inst)
|
||||||
MOV(RA, RS);
|
MOV(RA, RS);
|
||||||
|
|
||||||
ARMReg tmp = gpr.GetReg();
|
ARMReg tmp = gpr.GetReg();
|
||||||
Operand2 mask = Operand2(2, 2); // XER_CA_MASK
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
BIC(tmp, tmp, 1);
|
||||||
BIC(tmp, tmp, mask);
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER]));
|
|
||||||
gpr.Unlock(tmp);
|
gpr.Unlock(tmp);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,11 +76,10 @@ void JitArm::mtspr(UGeckoInstruction inst)
|
||||||
case SPR_SRR0:
|
case SPR_SRR0:
|
||||||
case SPR_SRR1:
|
case SPR_SRR1:
|
||||||
// These are safe to do the easy way, see the bottom of this function.
|
// These are safe to do the easy way, see the bottom of this function.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SPR_LR:
|
case SPR_LR:
|
||||||
case SPR_CTR:
|
case SPR_CTR:
|
||||||
case SPR_XER:
|
|
||||||
case SPR_GQR0:
|
case SPR_GQR0:
|
||||||
case SPR_GQR0 + 1:
|
case SPR_GQR0 + 1:
|
||||||
case SPR_GQR0 + 2:
|
case SPR_GQR0 + 2:
|
||||||
|
@ -90,8 +89,23 @@ void JitArm::mtspr(UGeckoInstruction inst)
|
||||||
case SPR_GQR0 + 6:
|
case SPR_GQR0 + 6:
|
||||||
case SPR_GQR0 + 7:
|
case SPR_GQR0 + 7:
|
||||||
// These are safe to do the easy way, see the bottom of this function.
|
// These are safe to do the easy way, see the bottom of this function.
|
||||||
break;
|
break;
|
||||||
|
case SPR_XER:
|
||||||
|
{
|
||||||
|
ARMReg RD = gpr.R(inst.RD);
|
||||||
|
ARMReg tmp = gpr.GetReg();
|
||||||
|
ARMReg mask = gpr.GetReg();
|
||||||
|
MOVI2R(mask, 0xFF7F);
|
||||||
|
AND(tmp, RD, mask);
|
||||||
|
STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl));
|
||||||
|
LSR(tmp, RD, XER_CA_SHIFT);
|
||||||
|
AND(tmp, tmp, 1);
|
||||||
|
STRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
|
LSR(tmp, RD, XER_OV_SHIFT);
|
||||||
|
STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
|
||||||
|
gpr.Unlock(tmp, mask);
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
FALLBACK_IF(true);
|
FALLBACK_IF(true);
|
||||||
}
|
}
|
||||||
|
@ -116,6 +130,20 @@ void JitArm::mfspr(UGeckoInstruction inst)
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
switch (iIndex)
|
switch (iIndex)
|
||||||
{
|
{
|
||||||
|
case SPR_XER:
|
||||||
|
{
|
||||||
|
ARMReg RD = gpr.R(inst.RD);
|
||||||
|
ARMReg tmp = gpr.GetReg();
|
||||||
|
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
|
||||||
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
|
||||||
|
LSL(tmp, tmp, XER_CA_SHIFT);
|
||||||
|
ORR(RD, RD, tmp);
|
||||||
|
LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov));
|
||||||
|
LSL(tmp, tmp, XER_OV_SHIFT);
|
||||||
|
ORR(RD, RD, tmp);
|
||||||
|
gpr.Unlock(tmp);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case SPR_WPAR:
|
case SPR_WPAR:
|
||||||
case SPR_DEC:
|
case SPR_DEC:
|
||||||
case SPR_TL:
|
case SPR_TL:
|
||||||
|
|
|
@ -896,30 +896,23 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
||||||
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
|
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
|
||||||
{
|
{
|
||||||
if (oe)
|
if (oe)
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0
|
AND(8, PPCSTATE(xer_so_ov), Imm32(~XER_OV_MASK)); //XER.OV = 0
|
||||||
BTR(32, PPCSTATE(spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0
|
SHR(8, PPCSTATE(xer_ca), Imm8(1)); //carry = XER.CA, XER.CA = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitSetCA()
|
void EmuCodeBlock::JitSetCA()
|
||||||
{
|
{
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
MOV(8, PPCSTATE(xer_ca), Imm8(1)); //XER.CA = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
|
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
|
||||||
// branchless calculation of CA is probably faster in general.
|
// branchless calculation of CA is probably faster in general.
|
||||||
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
||||||
{
|
{
|
||||||
SETcc(conditionCode, R(RSCRATCH));
|
SETcc(conditionCode, PPCSTATE(xer_ca));
|
||||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
|
|
||||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
|
|
||||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
void EmuCodeBlock::JitClearCA()
|
||||||
{
|
{
|
||||||
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
MOV(8, PPCSTATE(xer_ca), Imm8(0));
|
||||||
if (mask == 0xFFFFFFFF)
|
|
||||||
return;
|
|
||||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,7 +121,7 @@ public:
|
||||||
void JitGetAndClearCAOV(bool oe);
|
void JitGetAndClearCAOV(bool oe);
|
||||||
void JitSetCA();
|
void JitSetCA();
|
||||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||||
void JitClearCAOV(bool ca, bool oe);
|
void JitClearCA();
|
||||||
|
|
||||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||||
|
|
|
@ -61,6 +61,12 @@ struct GC_ALIGNED64(PowerPCState)
|
||||||
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
|
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
|
||||||
int downcount;
|
int downcount;
|
||||||
|
|
||||||
|
// XER, reformatted into byte fields for easier access.
|
||||||
|
u8 xer_ca;
|
||||||
|
u8 xer_so_ov; // format: (SO << 1) | OV
|
||||||
|
// The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support lscbx
|
||||||
|
u16 xer_stringctrl;
|
||||||
|
|
||||||
#if _M_X86_64
|
#if _M_X86_64
|
||||||
// This member exists for the purpose of an assertion in x86 JitBase.cpp
|
// This member exists for the purpose of an assertion in x86 JitBase.cpp
|
||||||
// that its offset <= 0x100. To minimize code size on x86, we want as much
|
// that its offset <= 0x100. To minimize code size on x86, we want as much
|
||||||
|
@ -252,35 +258,40 @@ inline u32 GetCR()
|
||||||
return PowerPC::CompactCR();
|
return PowerPC::CompactCR();
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetCarry/GetCarry may speed up soon.
|
|
||||||
inline void SetCarry(int ca)
|
inline void SetCarry(int ca)
|
||||||
{
|
{
|
||||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
|
PowerPC::ppcState.xer_ca = ca;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int GetCarry()
|
inline int GetCarry()
|
||||||
{
|
{
|
||||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
|
return PowerPC::ppcState.xer_ca;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline UReg_XER GetXER()
|
inline UReg_XER GetXER()
|
||||||
{
|
{
|
||||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
|
u32 xer = 0;
|
||||||
|
xer |= PowerPC::ppcState.xer_stringctrl;
|
||||||
|
xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT;
|
||||||
|
xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT;
|
||||||
|
return xer;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void SetXER(UReg_XER new_xer)
|
inline void SetXER(UReg_XER new_xer)
|
||||||
{
|
{
|
||||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
|
PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8);
|
||||||
|
PowerPC::ppcState.xer_ca = new_xer.CA;
|
||||||
|
PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int GetXER_SO()
|
inline int GetXER_SO()
|
||||||
{
|
{
|
||||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
|
return PowerPC::ppcState.xer_so_ov >> 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void SetXER_SO(int value)
|
inline void SetXER_SO(int value)
|
||||||
{
|
{
|
||||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
PowerPC::ppcState.xer_so_ov |= value << 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateFPRF(double dvalue);
|
void UpdateFPRF(double dvalue);
|
||||||
|
|
|
@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
|
||||||
static std::thread g_save_thread;
|
static std::thread g_save_thread;
|
||||||
|
|
||||||
// Don't forget to increase this after doing changes on the savestate system
|
// Don't forget to increase this after doing changes on the savestate system
|
||||||
static const u32 STATE_VERSION = 33;
|
static const u32 STATE_VERSION = 34;
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue