JIT64: optimize carry calculations
Omit carry calculations that get overwritten later in the block before they're used. Very common in the case of srawix and friends.
This commit is contained in:
parent
a40278b1c4
commit
3aa40dab00
|
@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
|
||||||
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||||
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||||
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
||||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}},
|
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
|
||||||
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||||
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||||
|
|
||||||
|
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
|
||||||
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
|
|
||||||
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
||||||
|
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
|
||||||
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
||||||
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
||||||
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
||||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}},
|
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
|
||||||
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||||
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ public:
|
||||||
void GenerateConstantOverflow(bool overflow);
|
void GenerateConstantOverflow(bool overflow);
|
||||||
void GenerateConstantOverflow(s64 val);
|
void GenerateConstantOverflow(s64 val);
|
||||||
void GenerateOverflow();
|
void GenerateOverflow();
|
||||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false);
|
||||||
void ComputeRC(const Gen::OpArg & arg);
|
void ComputeRC(const Gen::OpArg & arg);
|
||||||
|
|
||||||
// use to extract bytes from a register using the regcache. offset is in bytes.
|
// use to extract bytes from a register using the regcache. offset is in bytes.
|
||||||
|
|
|
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
|
||||||
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||||
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||||
|
|
||||||
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
||||||
|
|
|
@ -45,7 +45,7 @@ void Jit64::GenerateOverflow()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assumes CA,OV are clear
|
// Assumes CA,OV are clear
|
||||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
void Jit64::FinalizeCarryOverflow(bool ca, bool oe, bool inv)
|
||||||
{
|
{
|
||||||
// USES_XER
|
// USES_XER
|
||||||
if (oe)
|
if (oe)
|
||||||
|
@ -53,15 +53,17 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||||
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
|
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
|
||||||
// sides of the branch.
|
// sides of the branch.
|
||||||
FixupBranch jno = J_CC(CC_NO);
|
FixupBranch jno = J_CC(CC_NO);
|
||||||
|
if (ca)
|
||||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||||
//XER[OV/SO] = 1
|
//XER[OV/SO] = 1
|
||||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||||
FixupBranch exit = J();
|
FixupBranch exit = J();
|
||||||
SetJumpTarget(jno);
|
SetJumpTarget(jno);
|
||||||
|
if (ca)
|
||||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
else
|
else if (ca)
|
||||||
{
|
{
|
||||||
// Do carry
|
// Do carry
|
||||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||||
|
@ -129,10 +131,10 @@ static u32 Xor(u32 a, u32 b)
|
||||||
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
||||||
{
|
{
|
||||||
gpr.Lock(d, a);
|
gpr.Lock(d, a);
|
||||||
|
carry &= js.op->wantsCA;
|
||||||
if (a || binary || carry) // yeh nasty special case addic
|
if (a || binary || carry) // yeh nasty special case addic
|
||||||
{
|
{
|
||||||
if (carry)
|
JitClearCAOV(carry, false);
|
||||||
JitClearCAOV(false);
|
|
||||||
if (gpr.R(a).IsImm() && !carry)
|
if (gpr.R(a).IsImm() && !carry)
|
||||||
{
|
{
|
||||||
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
|
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
|
||||||
|
@ -749,33 +751,37 @@ void Jit64::subfic(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (imm == 0)
|
if (imm == 0)
|
||||||
{
|
{
|
||||||
JitClearCAOV(false);
|
JitClearCAOV(js.op->wantsCA, false);
|
||||||
// Flags act exactly like subtracting from 0
|
// Flags act exactly like subtracting from 0
|
||||||
NEG(32, gpr.R(d));
|
NEG(32, gpr.R(d));
|
||||||
// Output carry is inverted
|
// Output carry is inverted
|
||||||
|
if (js.op->wantsCA)
|
||||||
JitSetCAIf(CC_NC);
|
JitSetCAIf(CC_NC);
|
||||||
}
|
}
|
||||||
else if (imm == -1)
|
else if (imm == -1)
|
||||||
{
|
{
|
||||||
// CA is always set in this case
|
// CA is always set in this case
|
||||||
|
if (js.op->wantsCA)
|
||||||
JitSetCA();
|
JitSetCA();
|
||||||
NOT(32, gpr.R(d));
|
NOT(32, gpr.R(d));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
JitClearCAOV(false);
|
JitClearCAOV(js.op->wantsCA, false);
|
||||||
NOT(32, gpr.R(d));
|
NOT(32, gpr.R(d));
|
||||||
ADD(32, gpr.R(d), Imm32(imm+1));
|
ADD(32, gpr.R(d), Imm32(imm+1));
|
||||||
// Output carry is normal
|
// Output carry is normal
|
||||||
|
if (js.op->wantsCA)
|
||||||
JitSetCAIf(CC_C);
|
JitSetCAIf(CC_C);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
JitClearCAOV(false);
|
JitClearCAOV(js.op->wantsCA, false);
|
||||||
MOV(32, gpr.R(d), Imm32(imm));
|
MOV(32, gpr.R(d), Imm32(imm));
|
||||||
SUB(32, gpr.R(d), gpr.R(a));
|
SUB(32, gpr.R(d), gpr.R(a));
|
||||||
// Output carry is inverted
|
// Output carry is inverted
|
||||||
|
if (js.op->wantsCA)
|
||||||
JitSetCAIf(CC_NC);
|
JitSetCAIf(CC_NC);
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -789,8 +795,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
|
||||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||||
|
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||||
JitClearCAOV(inst.OE);
|
|
||||||
if (d == b)
|
if (d == b)
|
||||||
{
|
{
|
||||||
SUB(32, gpr.R(d), gpr.R(a));
|
SUB(32, gpr.R(d), gpr.R(a));
|
||||||
|
@ -808,7 +813,7 @@ void Jit64::subfcx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
FinalizeCarryOverflow(inst.OE, true);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, true);
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -842,7 +847,7 @@ void Jit64::subfex(UGeckoInstruction inst)
|
||||||
NOT(32, gpr.R(d));
|
NOT(32, gpr.R(d));
|
||||||
ADC(32, gpr.R(d), gpr.R(b));
|
ADC(32, gpr.R(d), gpr.R(b));
|
||||||
}
|
}
|
||||||
FinalizeCarryOverflow(inst.OE, invertedCarry);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, invertedCarry);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
|
|
||||||
|
@ -863,7 +868,7 @@ void Jit64::subfmex(UGeckoInstruction inst)
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
NOT(32, gpr.R(d));
|
NOT(32, gpr.R(d));
|
||||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -884,7 +889,7 @@ void Jit64::subfzex(UGeckoInstruction inst)
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
NOT(32, gpr.R(d));
|
NOT(32, gpr.R(d));
|
||||||
ADC(32, gpr.R(d), Imm8(0));
|
ADC(32, gpr.R(d), Imm8(0));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
|
|
||||||
|
@ -1375,7 +1380,7 @@ void Jit64::addex(UGeckoInstruction inst)
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
ADC(32, gpr.R(d), gpr.R(b));
|
ADC(32, gpr.R(d), gpr.R(b));
|
||||||
}
|
}
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -1392,9 +1397,9 @@ void Jit64::addcx(UGeckoInstruction inst)
|
||||||
int operand = ((d == a) ? b : a);
|
int operand = ((d == a) ? b : a);
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.BindToRegister(d, true);
|
gpr.BindToRegister(d, true);
|
||||||
JitClearCAOV(inst.OE);
|
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||||
ADD(32, gpr.R(d), gpr.R(operand));
|
ADD(32, gpr.R(d), gpr.R(operand));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -1403,10 +1408,10 @@ void Jit64::addcx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.BindToRegister(d, false);
|
gpr.BindToRegister(d, false);
|
||||||
JitClearCAOV(inst.OE);
|
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
ADD(32, gpr.R(d), gpr.R(b));
|
ADD(32, gpr.R(d), gpr.R(b));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -1426,7 +1431,7 @@ void Jit64::addmex(UGeckoInstruction inst)
|
||||||
if (d != a)
|
if (d != a)
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -1445,7 +1450,7 @@ void Jit64::addzex(UGeckoInstruction inst)
|
||||||
if (d != a)
|
if (d != a)
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
ADC(32, gpr.R(d), Imm8(0));
|
ADC(32, gpr.R(d), Imm8(0));
|
||||||
FinalizeCarryOverflow(inst.OE);
|
FinalizeCarryOverflow(js.op->wantsCA, inst.OE);
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC(gpr.R(d));
|
ComputeRC(gpr.R(d));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -1826,16 +1831,23 @@ void Jit64::srawx(UGeckoInstruction inst)
|
||||||
gpr.Lock(a, s, b);
|
gpr.Lock(a, s, b);
|
||||||
gpr.FlushLockX(ECX);
|
gpr.FlushLockX(ECX);
|
||||||
gpr.BindToRegister(a, (a == s || a == b), true);
|
gpr.BindToRegister(a, (a == s || a == b), true);
|
||||||
JitClearCAOV(false);
|
JitClearCAOV(js.op->wantsCA, false);
|
||||||
MOV(32, R(ECX), gpr.R(b));
|
MOV(32, R(ECX), gpr.R(b));
|
||||||
if (a != s)
|
if (a != s)
|
||||||
MOV(32, gpr.R(a), gpr.R(s));
|
MOV(32, gpr.R(a), gpr.R(s));
|
||||||
SHL(64, gpr.R(a), Imm8(32));
|
SHL(64, gpr.R(a), Imm8(32));
|
||||||
SAR(64, gpr.R(a), R(ECX));
|
SAR(64, gpr.R(a), R(ECX));
|
||||||
|
if (js.op->wantsCA)
|
||||||
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
SHR(64, gpr.R(a), Imm8(32));
|
SHR(64, gpr.R(a), Imm8(32));
|
||||||
TEST(32, gpr.R(a), R(EAX));
|
TEST(32, gpr.R(a), R(EAX));
|
||||||
JitSetCAIf(CC_NZ);
|
JitSetCAIf(CC_NZ);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SHR(64, gpr.R(a), Imm8(32));
|
||||||
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
|
@ -1853,7 +1865,15 @@ void Jit64::srawix(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
gpr.Lock(a, s);
|
gpr.Lock(a, s);
|
||||||
gpr.BindToRegister(a, a == s, true);
|
gpr.BindToRegister(a, a == s, true);
|
||||||
JitClearCAOV(false);
|
if (!js.op->wantsCA)
|
||||||
|
{
|
||||||
|
if (a != s)
|
||||||
|
MOV(32, gpr.R(a), gpr.R(s));
|
||||||
|
SAR(32, gpr.R(a), Imm8(amount));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JitClearCAOV(true, false);
|
||||||
MOV(32, R(EAX), gpr.R(s));
|
MOV(32, R(EAX), gpr.R(s));
|
||||||
if (a != s)
|
if (a != s)
|
||||||
MOV(32, gpr.R(a), R(EAX));
|
MOV(32, gpr.R(a), R(EAX));
|
||||||
|
@ -1882,13 +1902,14 @@ void Jit64::srawix(UGeckoInstruction inst)
|
||||||
JitSetCAIf(CC_NZ);
|
JitSetCAIf(CC_NZ);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// FIXME
|
// FIXME
|
||||||
FALLBACK_IF(true);
|
FALLBACK_IF(true);
|
||||||
|
|
||||||
gpr.Lock(a, s);
|
gpr.Lock(a, s);
|
||||||
JitClearCAOV(false);
|
JitClearCAOV(js.op->wantsCA, false);
|
||||||
gpr.BindToRegister(a, a == s, true);
|
gpr.BindToRegister(a, a == s, true);
|
||||||
|
|
||||||
if (a != s)
|
if (a != s)
|
||||||
|
|
|
@ -1110,7 +1110,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
Jit->JitSetCA();
|
Jit->JitSetCA();
|
||||||
FixupBranch cont = Jit->J();
|
FixupBranch cont = Jit->J();
|
||||||
Jit->SetJumpTarget(nocarry);
|
Jit->SetJumpTarget(nocarry);
|
||||||
Jit->JitClearCAOV(false);
|
Jit->JitClearCAOV(true, false);
|
||||||
Jit->SetJumpTarget(cont);
|
Jit->SetJumpTarget(cont);
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -824,10 +824,10 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
||||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitClearCAOV(bool oe)
|
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
||||||
{
|
{
|
||||||
if (oe)
|
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
|
if (mask == 0xFFFFFFFF)
|
||||||
else
|
return;
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(mask));
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ public:
|
||||||
void JitGetAndClearCAOV(bool oe);
|
void JitGetAndClearCAOV(bool oe);
|
||||||
void JitSetCA();
|
void JitSetCA();
|
||||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||||
void JitClearCAOV(bool oe);
|
void JitClearCAOV(bool ca, bool oe);
|
||||||
|
|
||||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||||
|
|
|
@ -430,7 +430,6 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||||
{
|
{
|
||||||
code->wantsCR0 = false;
|
code->wantsCR0 = false;
|
||||||
code->wantsCR1 = false;
|
code->wantsCR1 = false;
|
||||||
code->wantsPS1 = false;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_USE_FPU)
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
block->m_fpa->any = true;
|
block->m_fpa->any = true;
|
||||||
|
@ -458,6 +457,15 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||||
|
|
||||||
|
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
|
||||||
|
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
|
||||||
|
|
||||||
|
// mfspr/mtspr can affect/use XER, so be super careful here
|
||||||
|
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
|
||||||
|
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||||
|
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||||
|
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||||
|
|
||||||
int numOut = 0;
|
int numOut = 0;
|
||||||
int numIn = 0;
|
int numIn = 0;
|
||||||
if (opinfo->flags & FL_OUT_A)
|
if (opinfo->flags & FL_OUT_A)
|
||||||
|
@ -715,26 +723,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
block->m_broken = true;
|
block->m_broken = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan for CR0 dependency
|
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
|
||||||
// assume next block wants flags to be safe
|
// wants flags, to be safe.
|
||||||
bool wantsCR0 = true;
|
bool wantsCR0 = true;
|
||||||
bool wantsCR1 = true;
|
bool wantsCR1 = true;
|
||||||
bool wantsPS1 = true;
|
|
||||||
bool wantsFPRF = true;
|
bool wantsFPRF = true;
|
||||||
|
bool wantsCA = true;
|
||||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
bool opWantsCR0 = code[i].wantsCR0;
|
||||||
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
bool opWantsCR1 = code[i].wantsCR1;
|
||||||
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
bool opWantsFPRF = code[i].wantsFPRF;
|
||||||
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
bool opWantsCA = code[i].wantsCA;
|
||||||
|
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
|
||||||
|
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
|
||||||
|
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
|
||||||
|
wantsCA |= opWantsCA || code[i].canEndBlock;
|
||||||
code[i].wantsCR0 = wantsCR0;
|
code[i].wantsCR0 = wantsCR0;
|
||||||
code[i].wantsCR1 = wantsCR1;
|
code[i].wantsCR1 = wantsCR1;
|
||||||
code[i].wantsPS1 = wantsPS1;
|
|
||||||
code[i].wantsFPRF = wantsFPRF;
|
code[i].wantsFPRF = wantsFPRF;
|
||||||
wantsCR0 &= !code[i].outputCR0;
|
code[i].wantsCA = wantsCA;
|
||||||
wantsCR1 &= !code[i].outputCR1;
|
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
|
||||||
wantsPS1 &= !code[i].outputPS1;
|
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
|
||||||
wantsFPRF &= !code[i].outputFPRF;
|
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
|
||||||
|
wantsCA &= !code[i].outputCA || opWantsCA;
|
||||||
}
|
}
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,12 +33,12 @@ struct CodeOp //16B
|
||||||
bool isBranchTarget;
|
bool isBranchTarget;
|
||||||
bool wantsCR0;
|
bool wantsCR0;
|
||||||
bool wantsCR1;
|
bool wantsCR1;
|
||||||
bool wantsPS1;
|
|
||||||
bool wantsFPRF;
|
bool wantsFPRF;
|
||||||
|
bool wantsCA;
|
||||||
bool outputCR0;
|
bool outputCR0;
|
||||||
bool outputCR1;
|
bool outputCR1;
|
||||||
bool outputPS1;
|
|
||||||
bool outputFPRF;
|
bool outputFPRF;
|
||||||
|
bool outputCA;
|
||||||
bool canEndBlock;
|
bool canEndBlock;
|
||||||
bool skip; // followed BL-s for example
|
bool skip; // followed BL-s for example
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue