JIT64: Optimize carry handling

Carries are rather common and unpredictable, so do them branchlessly wherever
we can.
This commit is contained in:
Fiora 2014-08-24 11:35:57 -07:00
parent ee24d4714a
commit 805be80f12
6 changed files with 84 additions and 211 deletions

View File

@ -331,9 +331,12 @@ union UFPR
float f[2]; float f[2];
}; };
#define XER_CA_MASK 0x20000000 #define XER_CA_SHIFT 29
#define XER_OV_MASK 0x40000000 #define XER_OV_SHIFT 30
#define XER_SO_MASK 0x80000000 #define XER_SO_SHIFT 31
#define XER_CA_MASK (1U << XER_CA_SHIFT)
#define XER_OV_MASK (1U << XER_OV_SHIFT)
#define XER_SO_MASK (1U << XER_SO_SHIFT)
// XER // XER
union UReg_XER union UReg_XER
{ {

View File

@ -101,10 +101,6 @@ public:
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void GetCarryEAXAndClear();
void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false);
void GenerateCarry();
void GenerateRC();
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// use to extract bytes from a register using the regcache. offset is in bytes. // use to extract bytes from a register using the regcache. offset is in bytes.

View File

@ -31,6 +31,7 @@ void Jit64::GenerateConstantOverflow(bool overflow)
} }
} }
// We could do overflow branchlessly, but unlike carry it seems to be quite a bit rarer.
void Jit64::GenerateOverflow() void Jit64::GenerateOverflow()
{ {
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
@ -49,82 +50,24 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
// USES_XER // USES_XER
if (oe) if (oe)
{ {
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
// sides of the branch.
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
// Do carry JitSetCAIf(inv ? CC_NC : CC_C);
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
JitSetCA();
SetJumpTarget(carry1);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
// Do carry JitSetCAIf(inv ? CC_NC : CC_C);
FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC);
JitSetCA();
SetJumpTarget(carry2);
SetJumpTarget(exit); SetJumpTarget(exit);
} }
else else
{ {
// Do carry // Do carry
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); JitSetCAIf(inv ? CC_NC : CC_C);
JitSetCA();
SetJumpTarget(carry1);
} }
} }
void Jit64::GetCarryEAXAndClear()
{
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
BTR(32, R(EAX), Imm8(29));
}
// Assumes that XER is in EAX and that the CA bit is clear.
void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv)
{
// USES_XER
if (oe)
{
FixupBranch jno = J_CC(CC_NO);
// Do carry
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK));
SetJumpTarget(carry1);
//XER[OV/SO] = 1
OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J();
SetJumpTarget(jno);
// Do carry
FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK));
SetJumpTarget(carry2);
//XER[OV] = 0
AND(32, R(EAX), Imm32(~XER_OV_MASK));
SetJumpTarget(exit);
}
else
{
// Do carry
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK));
SetJumpTarget(carry1);
}
// Dump EAX back into XER
MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
}
// Assumes that the flags were just set through an addition.
void Jit64::GenerateCarry()
{
// USES_XER
FixupBranch pNoCarry = J_CC(CC_NC);
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK));
FixupBranch pContinue = J();
SetJumpTarget(pNoCarry);
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK)));
SetJumpTarget(pContinue);
}
void Jit64::ComputeRC(const Gen::OpArg & arg) void Jit64::ComputeRC(const Gen::OpArg & arg)
{ {
if (arg.IsImm()) if (arg.IsImm())
@ -153,7 +96,7 @@ OpArg Jit64::ExtractFromReg(int reg, int offset)
// we can't do this optimization in the emitter because MOVZX and AND have different effects on flags. // we can't do this optimization in the emitter because MOVZX and AND have different effects on flags.
void Jit64::AndWithMask(X64Reg reg, u32 mask) void Jit64::AndWithMask(X64Reg reg, u32 mask)
{ {
if (mask == 0xff) if (mask == 0xff)
MOVZX(32, 8, reg, R(reg)); MOVZX(32, 8, reg, R(reg));
else if (mask == 0xffff) else if (mask == 0xffff)
@ -188,22 +131,16 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
gpr.Lock(d, a); gpr.Lock(d, a);
if (a || binary || carry) // yeh nasty special case addic if (a || binary || carry) // yeh nasty special case addic
{ {
if (carry)
JitClearCAOV(false);
if (gpr.R(a).IsImm() && !carry) if (gpr.R(a).IsImm() && !carry)
{ {
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value)); gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
if (Rc)
{
ComputeRC(gpr.R(d));
}
} }
else if (a == d) else if (a == d)
{ {
gpr.KillImmediate(d, true, true); gpr.KillImmediate(d, true, true);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry)
GenerateCarry();
if (Rc)
ComputeRC(gpr.R(d));
} }
else else
{ {
@ -217,12 +154,12 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
} }
}
if (carry) if (carry)
GenerateCarry(); JitSetCAIf(CC_C);
if (Rc) if (Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
} }
}
else if (doop == Add) else if (doop == Add)
{ {
// a == 0, which for these instructions imply value = 0 // a == 0, which for these instructions imply value = 0
@ -849,13 +786,11 @@ void Jit64::subfic(UGeckoInstruction inst)
{ {
if (imm == 0) if (imm == 0)
{ {
JitClearCA(); JitClearCAOV(false);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
// Output carry is inverted // Output carry is inverted
FixupBranch carry1 = J_CC(CC_C); JitSetCAIf(CC_NC);
JitSetCA();
SetJumpTarget(carry1);
} }
else if (imm == -1) else if (imm == -1)
{ {
@ -865,24 +800,20 @@ void Jit64::subfic(UGeckoInstruction inst)
} }
else else
{ {
JitClearCA(); JitClearCAOV(false);
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADD(32, gpr.R(d), Imm32(imm+1)); ADD(32, gpr.R(d), Imm32(imm+1));
// Output carry is normal // Output carry is normal
FixupBranch carry1 = J_CC(CC_NC); JitSetCAIf(CC_C);
JitSetCA();
SetJumpTarget(carry1);
} }
} }
else else
{ {
JitClearCA(); JitClearCAOV(false);
MOV(32, gpr.R(d), Imm32(imm)); MOV(32, gpr.R(d), Imm32(imm));
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
// Output carry is inverted // Output carry is inverted
FixupBranch carry1 = J_CC(CC_C); JitSetCAIf(CC_NC);
JitSetCA();
SetJumpTarget(carry1);
} }
gpr.UnlockAll(); gpr.UnlockAll();
// This instruction has no RC flag // This instruction has no RC flag
@ -927,7 +858,7 @@ void Jit64::subfex(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
GetCarryEAXAndClear(); JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false; bool invertedCarry = false;
if (d == b) if (d == b)
@ -948,7 +879,7 @@ void Jit64::subfex(UGeckoInstruction inst)
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
} }
FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -964,14 +895,12 @@ void Jit64::subfmex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
GetCarryEAXAndClear(); JitGetAndClearCAOV(inst.OE);
if (d != a) if (d != a)
{
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
}
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -987,14 +916,12 @@ void Jit64::subfzex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
GetCarryEAXAndClear(); JitGetAndClearCAOV(inst.OE);
if (d != a) if (d != a)
{
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
}
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -1012,14 +939,10 @@ void Jit64::subfx(UGeckoInstruction inst)
s32 i = (s32)gpr.R(b).offset, j = (s32)gpr.R(a).offset; s32 i = (s32)gpr.R(b).offset, j = (s32)gpr.R(a).offset;
gpr.SetImmediate32(d, i - j); gpr.SetImmediate32(d, i - j);
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
}
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow((s64)i - (s64)j); GenerateConstantOverflow((s64)i - (s64)j);
} }
}
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
@ -1477,31 +1400,22 @@ void Jit64::addex(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a) || (d == b));
JitGetAndClearCAOV(inst.OE);
if ((d == a) || (d == b)) if ((d == a) || (d == b))
{ {
gpr.Lock(a, b, d);
gpr.BindToRegister(d, true);
GetCarryEAXAndClear();
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
else else
{ {
gpr.Lock(a, b, d);
gpr.BindToRegister(d, false);
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
FinalizeCarryGenerateOverflowEAX(inst.OE); }
FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
}
} }
void Jit64::addcx(UGeckoInstruction inst) void Jit64::addcx(UGeckoInstruction inst)
@ -1543,31 +1457,16 @@ void Jit64::addmex(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
if (d == a)
{
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
GetCarryEAXAndClear(); if (d != a)
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
{
gpr.Lock(a, d);
gpr.BindToRegister(d, false);
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
}
} }
void Jit64::addzex(UGeckoInstruction inst) void Jit64::addzex(UGeckoInstruction inst)
@ -1577,31 +1476,16 @@ void Jit64::addzex(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
if (d == a)
{
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
GetCarryEAXAndClear(); if (d != a)
ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
else
{
gpr.Lock(a, d);
gpr.BindToRegister(d, false);
GetCarryEAXAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
}
} }
void Jit64::rlwinmx(UGeckoInstruction inst) void Jit64::rlwinmx(UGeckoInstruction inst)
@ -1792,8 +1676,8 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
{ {
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.BindToRegister(a, (a == b || a == s), true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
gpr.BindToRegister(a, (a == s), true);
if (a != s) if (a != s)
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
@ -1900,9 +1784,7 @@ void Jit64::slwx(UGeckoInstruction inst)
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)
{
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
}
SHL(64, gpr.R(a), R(ECX)); SHL(64, gpr.R(a), R(ECX));
if (inst.Rc) if (inst.Rc)
{ {
@ -1929,7 +1811,7 @@ void Jit64::srawx(UGeckoInstruction inst)
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCA(); JitClearCAOV(false);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
@ -1938,16 +1820,11 @@ void Jit64::srawx(UGeckoInstruction inst)
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(EAX)); TEST(32, gpr.R(a), R(EAX));
FixupBranch nocarry = J_CC(CC_Z); JitSetCAIf(CC_NZ);
JitSetCA();
SetJumpTarget(nocarry);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
}
} }
void Jit64::srawix(UGeckoInstruction inst) void Jit64::srawix(UGeckoInstruction inst)
@ -1961,21 +1838,14 @@ void Jit64::srawix(UGeckoInstruction inst)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
JitClearCA(); JitClearCAOV(false);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
if (a != s) if (a != s)
{
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
}
SAR(32, gpr.R(a), Imm8(amount)); SAR(32, gpr.R(a), Imm8(amount));
if (inst.Rc) SHL(32, R(EAX), Imm8(32 - amount));
ComputeRC(gpr.R(a));
SHL(32, R(EAX), Imm8(32-amount));
TEST(32, R(EAX), gpr.R(a)); TEST(32, R(EAX), gpr.R(a));
FixupBranch nocarry = J_CC(CC_Z); JitSetCAIf(CC_NZ);
JitSetCA();
SetJumpTarget(nocarry);
gpr.UnlockAll();
} }
else else
{ {
@ -1983,20 +1853,15 @@ void Jit64::srawix(UGeckoInstruction inst)
FALLBACK_IF(true); FALLBACK_IF(true);
gpr.Lock(a, s); gpr.Lock(a, s);
JitClearCA(); JitClearCAOV(false);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)
{
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
} }
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
}
gpr.UnlockAll(); gpr.UnlockAll();
}
} }
// count leading zeroes // count leading zeroes
@ -2032,10 +1897,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
} }
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
// TODO: Check PPC manual too
}
} }
void Jit64::twx(UGeckoInstruction inst) void Jit64::twx(UGeckoInstruction inst)

View File

@ -1110,7 +1110,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA(); Jit->JitSetCA();
FixupBranch cont = Jit->J(); FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry); Jit->SetJumpTarget(nocarry);
Jit->JitClearCA(); Jit->JitClearCAOV(false);
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;

View File

@ -802,10 +802,11 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
OR(32, M(&FPSCR), R(EAX)); OR(32, M(&FPSCR), R(EAX));
} }
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
void EmuCodeBlock::JitClearCA()
{ {
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 if (oe)
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0
BTR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0
} }
void EmuCodeBlock::JitSetCA() void EmuCodeBlock::JitSetCA()
@ -813,6 +814,16 @@ void EmuCodeBlock::JitSetCA()
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
} }
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
// branchless calculation of CA is probably faster in general.
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
{
SETcc(conditionCode, R(EAX));
MOVZX(32, 8, EAX, R(AL));
SHL(32, R(EAX), Imm8(XER_CA_SHIFT));
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
}
void EmuCodeBlock::JitClearCAOV(bool oe) void EmuCodeBlock::JitClearCAOV(bool oe)
{ {
if (oe) if (oe)

View File

@ -50,8 +50,9 @@ public:
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
void JitClearCA(); void JitGetAndClearCAOV(bool oe);
void JitSetCA(); void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool oe); void JitClearCAOV(bool oe);
void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionS(Gen::X64Reg xmm);