JIT64: optimize carry calculations
Omit carry calculations that get overwritten later in the block before they're used. Very common in the case of srawix and friends.
This commit is contained in:
parent
a40278b1c4
commit
3aa40dab00
|
@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
|
|||
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}},
|
||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
|
||||
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||
|
||||
|
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
|
|||
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||
|
||||
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
||||
|
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
|
|||
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
||||
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
||||
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}},
|
||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
|
||||
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ public:
|
|||
void GenerateConstantOverflow(bool overflow);
|
||||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false);
|
||||
void ComputeRC(const Gen::OpArg & arg);
|
||||
|
||||
// use to extract bytes from a register using the regcache. offset is in bytes.
|
||||
|
|
|
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
|
|||
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||
|
||||
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
||||
|
|
|
@ -45,7 +45,7 @@ void Jit64::GenerateOverflow()
|
|||
}
|
||||
|
||||
// Assumes CA,OV are clear
|
||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||
void Jit64::FinalizeCarryOverflow(bool ca, bool oe, bool inv)
|
||||
{
|
||||
// USES_XER
|
||||
if (oe)
|
||||
|
@ -53,15 +53,17 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
|||
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
|
||||
// sides of the branch.
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
if (ca)
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(jno);
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
if (ca)
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
else
|
||||
else if (ca)
|
||||
{
|
||||
// Do carry
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
|
@ -129,10 +131,10 @@ static u32 Xor(u32 a, u32 b)
|
|||
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
||||
{
|
||||
gpr.Lock(d, a);
|
||||
carry &= js.op->wantsCA;
|
||||
if (a || binary || carry) // yeh nasty special case addic
|
||||
{
|
||||
if (carry)
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(carry, false);
|
||||
if (gpr.R(a).IsImm() && !carry)
|
||||
{
|
||||
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
|
||||
|
@ -749,34 +751,38 @@ void Jit64::subfic(UGeckoInstruction inst)
|
|||
{
|
||||
if (imm == 0)
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
// Flags act exactly like subtracting from 0
|
||||
NEG(32, gpr.R(d));
|
||||
// Output carry is inverted
|
||||
JitSetCAIf(CC_NC);
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_NC);
|
||||
}
|
||||
else if (imm == -1)
|
||||
{
|
||||
// CA is always set in this case
|
||||
JitSetCA();
|
||||
if (js.op->wantsCA)
|
||||
JitSetCA();
|
||||
NOT(32, gpr.R(d));
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
NOT(32, gpr.R(d));
|
||||
ADD(32, gpr.R(d), Imm32(imm+1));
|
||||
// Output carry is normal
|
||||
JitSetCAIf(CC_C);
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_C);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
MOV(32, gpr.R(d), Imm32(imm));
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
// Output carry is inverted
|
||||
JitSetCAIf(CC_NC);
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_NC);
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
// This instruction has no RC flag
|
||||
|
@ -789,8 +795,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
|
|||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
|
||||
JitClearCAOV(inst.OE);
|
||||
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||
if (d == b)
|
||||
{
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
|
@ -808,7 +813,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
|
|||
}
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
FinalizeCarryOverflow(inst.OE, true);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, true);
|
||||
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
@ -842,7 +847,7 @@ void Jit64::subfex(UGeckoInstruction inst)
|
|||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
FinalizeCarryOverflow(inst.OE, invertedCarry);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, invertedCarry);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
|
@ -863,7 +868,7 @@ void Jit64::subfmex(UGeckoInstruction inst)
|
|||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -884,7 +889,7 @@ void Jit64::subfzex(UGeckoInstruction inst)
|
|||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
|
@ -1375,7 +1380,7 @@ void Jit64::addex(UGeckoInstruction inst)
|
|||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1392,9 +1397,9 @@ void Jit64::addcx(UGeckoInstruction inst)
|
|||
int operand = ((d == a) ? b : a);
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, true);
|
||||
JitClearCAOV(inst.OE);
|
||||
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||
ADD(32, gpr.R(d), gpr.R(operand));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1403,10 +1408,10 @@ void Jit64::addcx(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, false);
|
||||
JitClearCAOV(inst.OE);
|
||||
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADD(32, gpr.R(d), gpr.R(b));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1426,7 +1431,7 @@ void Jit64::addmex(UGeckoInstruction inst)
|
|||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1445,7 +1450,7 @@ void Jit64::addzex(UGeckoInstruction inst)
|
|||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryOverflow(inst.OE);
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1826,16 +1831,23 @@ void Jit64::srawx(UGeckoInstruction inst)
|
|||
gpr.Lock(a, s, b);
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.BindToRegister(a, (a == s || a == b), true);
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
MOV(32, R(ECX), gpr.R(b));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
SHL(64, gpr.R(a), Imm8(32));
|
||||
SAR(64, gpr.R(a), R(ECX));
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(EAX));
|
||||
JitSetCAIf(CC_NZ);
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(EAX));
|
||||
JitSetCAIf(CC_NZ);
|
||||
}
|
||||
else
|
||||
{
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc)
|
||||
|
@ -1853,33 +1865,42 @@ void Jit64::srawix(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
JitClearCAOV(false);
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
// some optimized common cases that can be done in slightly fewer ops
|
||||
if (amount == 31)
|
||||
if (!js.op->wantsCA)
|
||||
{
|
||||
SAR(32, gpr.R(a), Imm8(31));
|
||||
NEG(32, R(EAX)); // EAX = input == INT_MIN ? INT_MIN : -input;
|
||||
AND(32, R(EAX), Imm32(0x80000000)); // EAX = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
||||
SHR(32, R(EAX), Imm8(31 - XER_CA_SHIFT));
|
||||
XOR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); // XER.CA = (input < 0 && input != INT_MIN)
|
||||
}
|
||||
else if (amount == 1)
|
||||
{
|
||||
SHR(32, R(EAX), Imm8(31)); // sign
|
||||
AND(32, R(EAX), gpr.R(a)); // (sign && carry)
|
||||
SAR(32, gpr.R(a), Imm8(1));
|
||||
SHL(32, R(EAX), Imm8(XER_CA_SHIFT));
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
}
|
||||
else
|
||||
{
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(EAX), Imm8(32 - amount));
|
||||
TEST(32, R(EAX), gpr.R(a));
|
||||
JitSetCAIf(CC_NZ);
|
||||
JitClearCAOV(true, false);
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
// some optimized common cases that can be done in slightly fewer ops
|
||||
if (amount == 31)
|
||||
{
|
||||
SAR(32, gpr.R(a), Imm8(31));
|
||||
NEG(32, R(EAX)); // EAX = input == INT_MIN ? INT_MIN : -input;
|
||||
AND(32, R(EAX), Imm32(0x80000000)); // EAX = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
||||
SHR(32, R(EAX), Imm8(31 - XER_CA_SHIFT));
|
||||
XOR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); // XER.CA = (input < 0 && input != INT_MIN)
|
||||
}
|
||||
else if (amount == 1)
|
||||
{
|
||||
SHR(32, R(EAX), Imm8(31)); // sign
|
||||
AND(32, R(EAX), gpr.R(a)); // (sign && carry)
|
||||
SAR(32, gpr.R(a), Imm8(1));
|
||||
SHL(32, R(EAX), Imm8(XER_CA_SHIFT));
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
}
|
||||
else
|
||||
{
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(EAX), Imm8(32 - amount));
|
||||
TEST(32, R(EAX), gpr.R(a));
|
||||
JitSetCAIf(CC_NZ);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1888,7 +1909,7 @@ void Jit64::srawix(UGeckoInstruction inst)
|
|||
FALLBACK_IF(true);
|
||||
|
||||
gpr.Lock(a, s);
|
||||
JitClearCAOV(false);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
|
||||
if (a != s)
|
||||
|
|
|
@ -1110,7 +1110,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
Jit->JitSetCA();
|
||||
FixupBranch cont = Jit->J();
|
||||
Jit->SetJumpTarget(nocarry);
|
||||
Jit->JitClearCAOV(false);
|
||||
Jit->JitClearCAOV(true, false);
|
||||
Jit->SetJumpTarget(cont);
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
|
|
@ -824,10 +824,10 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
|||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
|
||||
}
|
||||
|
||||
void EmuCodeBlock::JitClearCAOV(bool oe)
|
||||
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
||||
{
|
||||
if (oe)
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
|
||||
else
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
||||
if (mask == 0xFFFFFFFF)
|
||||
return;
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(mask));
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public:
|
|||
void JitGetAndClearCAOV(bool oe);
|
||||
void JitSetCA();
|
||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||
void JitClearCAOV(bool oe);
|
||||
void JitClearCAOV(bool ca, bool oe);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
|
|
|
@ -430,7 +430,6 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
|||
{
|
||||
code->wantsCR0 = false;
|
||||
code->wantsCR1 = false;
|
||||
code->wantsPS1 = false;
|
||||
|
||||
if (opinfo->flags & FL_USE_FPU)
|
||||
block->m_fpa->any = true;
|
||||
|
@ -458,6 +457,15 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
|||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||
|
||||
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
|
||||
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
|
||||
|
||||
// mfspr/mtspr can affect/use XER, so be super careful here
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
|
||||
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
|
||||
int numOut = 0;
|
||||
int numIn = 0;
|
||||
if (opinfo->flags & FL_OUT_A)
|
||||
|
@ -715,26 +723,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
|||
block->m_broken = true;
|
||||
}
|
||||
|
||||
// Scan for CR0 dependency
|
||||
// assume next block wants flags to be safe
|
||||
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
|
||||
// wants flags, to be safe.
|
||||
bool wantsCR0 = true;
|
||||
bool wantsCR1 = true;
|
||||
bool wantsPS1 = true;
|
||||
bool wantsFPRF = true;
|
||||
bool wantsCA = true;
|
||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||
{
|
||||
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
||||
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
||||
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
||||
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
||||
code[i].wantsCR0 = wantsCR0;
|
||||
code[i].wantsCR1 = wantsCR1;
|
||||
code[i].wantsPS1 = wantsPS1;
|
||||
bool opWantsCR0 = code[i].wantsCR0;
|
||||
bool opWantsCR1 = code[i].wantsCR1;
|
||||
bool opWantsFPRF = code[i].wantsFPRF;
|
||||
bool opWantsCA = code[i].wantsCA;
|
||||
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
|
||||
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
|
||||
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
|
||||
wantsCA |= opWantsCA || code[i].canEndBlock;
|
||||
code[i].wantsCR0 = wantsCR0;
|
||||
code[i].wantsCR1 = wantsCR1;
|
||||
code[i].wantsFPRF = wantsFPRF;
|
||||
wantsCR0 &= !code[i].outputCR0;
|
||||
wantsCR1 &= !code[i].outputCR1;
|
||||
wantsPS1 &= !code[i].outputPS1;
|
||||
wantsFPRF &= !code[i].outputFPRF;
|
||||
code[i].wantsCA = wantsCA;
|
||||
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
|
||||
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
|
||||
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
|
||||
wantsCA &= !code[i].outputCA || opWantsCA;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
|
|
@ -33,12 +33,12 @@ struct CodeOp //16B
|
|||
bool isBranchTarget;
|
||||
bool wantsCR0;
|
||||
bool wantsCR1;
|
||||
bool wantsPS1;
|
||||
bool wantsFPRF;
|
||||
bool wantsCA;
|
||||
bool outputCR0;
|
||||
bool outputCR1;
|
||||
bool outputPS1;
|
||||
bool outputFPRF;
|
||||
bool outputCA;
|
||||
bool canEndBlock;
|
||||
bool skip; // followed BL-s for example
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue