JIT64: optimize carry calculations

Omit carry calculations that get overwritten later in the block before they're
used. Very common in the case of srawix and friends.
This commit is contained in:
Fiora 2014-08-21 13:56:18 -07:00
parent a40278b1c4
commit 3aa40dab00
9 changed files with 119 additions and 86 deletions

View File

@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}}, {10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}}, {11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}}, {12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}}, {13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}}, {14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}}, {15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}}, {54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}}, {339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}}, {467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}}, {371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, {512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, {595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, {659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},

View File

@ -100,7 +100,7 @@ public:
void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false);
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// use to extract bytes from a register using the regcache. offset is in bytes. // use to extract bytes from a register using the regcache. offset is in bytes.

View File

@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, {54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},

View File

@ -45,7 +45,7 @@ void Jit64::GenerateOverflow()
} }
// Assumes CA,OV are clear // Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool oe, bool inv) void Jit64::FinalizeCarryOverflow(bool ca, bool oe, bool inv)
{ {
// USES_XER // USES_XER
if (oe) if (oe)
@ -53,15 +53,17 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both // this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
// sides of the branch. // sides of the branch.
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
if (ca)
JitSetCAIf(inv ? CC_NC : CC_C); JitSetCAIf(inv ? CC_NC : CC_C);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
if (ca)
JitSetCAIf(inv ? CC_NC : CC_C); JitSetCAIf(inv ? CC_NC : CC_C);
SetJumpTarget(exit); SetJumpTarget(exit);
} }
else else if (ca)
{ {
// Do carry // Do carry
JitSetCAIf(inv ? CC_NC : CC_C); JitSetCAIf(inv ? CC_NC : CC_C);
@ -129,10 +131,10 @@ static u32 Xor(u32 a, u32 b)
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
{ {
gpr.Lock(d, a); gpr.Lock(d, a);
carry &= js.op->wantsCA;
if (a || binary || carry) // yeh nasty special case addic if (a || binary || carry) // yeh nasty special case addic
{ {
if (carry) JitClearCAOV(carry, false);
JitClearCAOV(false);
if (gpr.R(a).IsImm() && !carry) if (gpr.R(a).IsImm() && !carry)
{ {
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value)); gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
@ -749,33 +751,37 @@ void Jit64::subfic(UGeckoInstruction inst)
{ {
if (imm == 0) if (imm == 0)
{ {
JitClearCAOV(false); JitClearCAOV(js.op->wantsCA, false);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
// Output carry is inverted // Output carry is inverted
if (js.op->wantsCA)
JitSetCAIf(CC_NC); JitSetCAIf(CC_NC);
} }
else if (imm == -1) else if (imm == -1)
{ {
// CA is always set in this case // CA is always set in this case
if (js.op->wantsCA)
JitSetCA(); JitSetCA();
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
} }
else else
{ {
JitClearCAOV(false); JitClearCAOV(js.op->wantsCA, false);
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADD(32, gpr.R(d), Imm32(imm+1)); ADD(32, gpr.R(d), Imm32(imm+1));
// Output carry is normal // Output carry is normal
if (js.op->wantsCA)
JitSetCAIf(CC_C); JitSetCAIf(CC_C);
} }
} }
else else
{ {
JitClearCAOV(false); JitClearCAOV(js.op->wantsCA, false);
MOV(32, gpr.R(d), Imm32(imm)); MOV(32, gpr.R(d), Imm32(imm));
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
// Output carry is inverted // Output carry is inverted
if (js.op->wantsCA)
JitSetCAIf(CC_NC); JitSetCAIf(CC_NC);
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -789,8 +795,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
JitClearCAOV(js.op->wantsCA, inst.OE);
JitClearCAOV(inst.OE);
if (d == b) if (d == b)
{ {
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
@ -808,7 +813,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
FinalizeCarryOverflow(inst.OE, true); FinalizeCarryOverflow(js.op->wantsCA, inst.OE, true);
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -842,7 +847,7 @@ void Jit64::subfex(UGeckoInstruction inst)
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
} }
FinalizeCarryOverflow(inst.OE, invertedCarry); FinalizeCarryOverflow(js.op->wantsCA, inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -863,7 +868,7 @@ void Jit64::subfmex(UGeckoInstruction inst)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -884,7 +889,7 @@ void Jit64::subfzex(UGeckoInstruction inst)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -1375,7 +1380,7 @@ void Jit64::addex(UGeckoInstruction inst)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
} }
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1392,9 +1397,9 @@ void Jit64::addcx(UGeckoInstruction inst)
int operand = ((d == a) ? b : a); int operand = ((d == a) ? b : a);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, true);
JitClearCAOV(inst.OE); JitClearCAOV(js.op->wantsCA, inst.OE);
ADD(32, gpr.R(d), gpr.R(operand)); ADD(32, gpr.R(d), gpr.R(operand));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1403,10 +1408,10 @@ void Jit64::addcx(UGeckoInstruction inst)
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
JitClearCAOV(inst.OE); JitClearCAOV(js.op->wantsCA, inst.OE);
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b)); ADD(32, gpr.R(d), gpr.R(b));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1426,7 +1431,7 @@ void Jit64::addmex(UGeckoInstruction inst)
if (d != a) if (d != a)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1445,7 +1450,7 @@ void Jit64::addzex(UGeckoInstruction inst)
if (d != a) if (d != a)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1826,16 +1831,23 @@ void Jit64::srawx(UGeckoInstruction inst)
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCAOV(false); JitClearCAOV(js.op->wantsCA, false);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
SHL(64, gpr.R(a), Imm8(32)); SHL(64, gpr.R(a), Imm8(32));
SAR(64, gpr.R(a), R(ECX)); SAR(64, gpr.R(a), R(ECX));
if (js.op->wantsCA)
{
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(EAX)); TEST(32, gpr.R(a), R(EAX));
JitSetCAIf(CC_NZ); JitSetCAIf(CC_NZ);
}
else
{
SHR(64, gpr.R(a), Imm8(32));
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
if (inst.Rc) if (inst.Rc)
@ -1853,7 +1865,15 @@ void Jit64::srawix(UGeckoInstruction inst)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
JitClearCAOV(false); if (!js.op->wantsCA)
{
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));
SAR(32, gpr.R(a), Imm8(amount));
}
else
{
JitClearCAOV(true, false);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
if (a != s) if (a != s)
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
@ -1882,13 +1902,14 @@ void Jit64::srawix(UGeckoInstruction inst)
JitSetCAIf(CC_NZ); JitSetCAIf(CC_NZ);
} }
} }
}
else else
{ {
// FIXME // FIXME
FALLBACK_IF(true); FALLBACK_IF(true);
gpr.Lock(a, s); gpr.Lock(a, s);
JitClearCAOV(false); JitClearCAOV(js.op->wantsCA, false);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)

View File

@ -1110,7 +1110,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA(); Jit->JitSetCA();
FixupBranch cont = Jit->J(); FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry); Jit->SetJumpTarget(nocarry);
Jit->JitClearCAOV(false); Jit->JitClearCAOV(true, false);
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;

View File

@ -824,10 +824,10 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1 OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
} }
void EmuCodeBlock::JitClearCAOV(bool oe) void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
{ {
if (oe) u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 if (mask == 0xFFFFFFFF)
else return;
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(mask));
} }

View File

@ -53,7 +53,7 @@ public:
void JitGetAndClearCAOV(bool oe); void JitGetAndClearCAOV(bool oe);
void JitSetCA(); void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode); void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool oe); void JitClearCAOV(bool ca, bool oe);
void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm);

View File

@ -430,7 +430,6 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
{ {
code->wantsCR0 = false; code->wantsCR0 = false;
code->wantsCR1 = false; code->wantsCR1 = false;
code->wantsPS1 = false;
if (opinfo->flags & FL_USE_FPU) if (opinfo->flags & FL_USE_FPU)
block->m_fpa->any = true; block->m_fpa->any = true;
@ -458,6 +457,15 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false; code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
// mfspr/mtspr can affect/use XER, so be super careful here
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
int numOut = 0; int numOut = 0;
int numIn = 0; int numIn = 0;
if (opinfo->flags & FL_OUT_A) if (opinfo->flags & FL_OUT_A)
@ -715,26 +723,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
block->m_broken = true; block->m_broken = true;
} }
// Scan for CR0 dependency // Scan for flag dependencies; assume the next block (or any branch that can leave the block)
// assume next block wants flags to be safe // wants flags, to be safe.
bool wantsCR0 = true; bool wantsCR0 = true;
bool wantsCR1 = true; bool wantsCR1 = true;
bool wantsPS1 = true;
bool wantsFPRF = true; bool wantsFPRF = true;
bool wantsCA = true;
for (int i = block->m_num_instructions - 1; i >= 0; i--) for (int i = block->m_num_instructions - 1; i >= 0; i--)
{ {
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock; bool opWantsCR0 = code[i].wantsCR0;
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock; bool opWantsCR1 = code[i].wantsCR1;
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock; bool opWantsFPRF = code[i].wantsFPRF;
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock; bool opWantsCA = code[i].wantsCA;
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
wantsCA |= opWantsCA || code[i].canEndBlock;
code[i].wantsCR0 = wantsCR0; code[i].wantsCR0 = wantsCR0;
code[i].wantsCR1 = wantsCR1; code[i].wantsCR1 = wantsCR1;
code[i].wantsPS1 = wantsPS1;
code[i].wantsFPRF = wantsFPRF; code[i].wantsFPRF = wantsFPRF;
wantsCR0 &= !code[i].outputCR0; code[i].wantsCA = wantsCA;
wantsCR1 &= !code[i].outputCR1; wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
wantsPS1 &= !code[i].outputPS1; wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
wantsFPRF &= !code[i].outputFPRF; wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
wantsCA &= !code[i].outputCA || opWantsCA;
} }
return address; return address;
} }

View File

@ -33,12 +33,12 @@ struct CodeOp //16B
bool isBranchTarget; bool isBranchTarget;
bool wantsCR0; bool wantsCR0;
bool wantsCR1; bool wantsCR1;
bool wantsPS1;
bool wantsFPRF; bool wantsFPRF;
bool wantsCA;
bool outputCR0; bool outputCR0;
bool outputCR1; bool outputCR1;
bool outputPS1;
bool outputFPRF; bool outputFPRF;
bool outputCA;
bool canEndBlock; bool canEndBlock;
bool skip; // followed BL-s for example bool skip; // followed BL-s for example
}; };