JIT64: optimize carry calculations further
Keep carry flags in the x86 flags register if used in the next instruction.
This commit is contained in:
parent
bea2504a51
commit
45d84605a9
|
@ -457,6 +457,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||
|
||||
js.skipnext = false;
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
js.compilerPC = nextPC;
|
||||
// Translate instructions
|
||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||
|
@ -488,6 +490,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
js.next_op = &ops[i + 1];
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
|
|
|
@ -100,7 +100,9 @@ public:
|
|||
void GenerateConstantOverflow(bool overflow);
|
||||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false);
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarry(Gen::CCFlags cond);
|
||||
void FinalizeCarry(bool ca);
|
||||
void ComputeRC(const Gen::OpArg & arg);
|
||||
|
||||
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
||||
|
|
|
@ -273,7 +273,7 @@ static GekkoOPTemplate table31[] =
|
|||
{339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
|
||||
{467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
|
||||
{371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
|
||||
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}},
|
||||
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA}},
|
||||
{595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
|
||||
{659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
|
||||
|
||||
|
|
|
@ -44,30 +44,76 @@ void Jit64::GenerateOverflow()
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void Jit64::FinalizeCarry(CCFlags cond)
|
||||
{
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
if (js.next_op->wantsCAInFlags)
|
||||
{
|
||||
if (cond == CC_C || cond == CC_NC)
|
||||
{
|
||||
js.carryFlagInverted = cond == CC_NC;
|
||||
}
|
||||
else
|
||||
{
|
||||
// convert the condition to a carry flag (is there a better way?)
|
||||
SETcc(cond, R(RSCRATCH));
|
||||
BT(8, R(RSCRATCH), Imm8(0));
|
||||
}
|
||||
js.carryFlagSet = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
JitSetCAIf(cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unconditional version
|
||||
void Jit64::FinalizeCarry(bool ca)
|
||||
{
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
if (js.next_op->wantsCAInFlags)
|
||||
{
|
||||
if (ca)
|
||||
STC();
|
||||
else
|
||||
CLC();
|
||||
js.carryFlagSet = true;
|
||||
}
|
||||
else if (ca)
|
||||
{
|
||||
JitSetCA();
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assumes CA,OV are clear
|
||||
void Jit64::FinalizeCarryOverflow(bool ca, bool oe, bool inv)
|
||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||
{
|
||||
// USES_XER
|
||||
if (oe)
|
||||
{
|
||||
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both
|
||||
// sides of the branch.
|
||||
// Make sure not to lose the carry flags (not a big deal, this path is rare).
|
||||
PUSHF();
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
if (ca)
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(jno);
|
||||
if (ca)
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
else if (ca)
|
||||
{
|
||||
// Do carry
|
||||
JitSetCAIf(inv ? CC_NC : CC_C);
|
||||
POPF();
|
||||
}
|
||||
// Do carry
|
||||
FinalizeCarry(inv ? CC_NC : CC_C);
|
||||
}
|
||||
|
||||
void Jit64::ComputeRC(const Gen::OpArg & arg)
|
||||
|
@ -135,7 +181,6 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
|
|||
if (a || binary || carry)
|
||||
{
|
||||
carry &= js.op->wantsCA;
|
||||
JitClearCAOV(carry, false);
|
||||
if (gpr.R(a).IsImm() && !carry)
|
||||
{
|
||||
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
|
||||
|
@ -159,7 +204,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
|
|||
}
|
||||
}
|
||||
if (carry)
|
||||
JitSetCAIf(CC_C);
|
||||
FinalizeCarry(CC_C);
|
||||
if (Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
}
|
||||
|
@ -720,38 +765,31 @@ void Jit64::subfic(UGeckoInstruction inst)
|
|||
{
|
||||
if (imm == 0)
|
||||
{
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
// Flags act exactly like subtracting from 0
|
||||
NEG(32, gpr.R(d));
|
||||
// Output carry is inverted
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_NC);
|
||||
FinalizeCarry(CC_NC);
|
||||
}
|
||||
else if (imm == -1)
|
||||
{
|
||||
// CA is always set in this case
|
||||
if (js.op->wantsCA)
|
||||
JitSetCA();
|
||||
NOT(32, gpr.R(d));
|
||||
// CA is always set in this case
|
||||
FinalizeCarry(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
NOT(32, gpr.R(d));
|
||||
ADD(32, gpr.R(d), Imm32(imm+1));
|
||||
// Output carry is normal
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_C);
|
||||
FinalizeCarry(CC_C);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
MOV(32, gpr.R(d), Imm32(imm));
|
||||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
// Output carry is inverted
|
||||
if (js.op->wantsCA)
|
||||
JitSetCAIf(CC_NC);
|
||||
FinalizeCarry(CC_NC);
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
// This instruction has no RC flag
|
||||
|
@ -1233,29 +1271,44 @@ void Jit64::arithXex(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = regsource ? inst.RB : a;
|
||||
int d = inst.RD;
|
||||
bool same_input_sub = !add && regsource && a == b;
|
||||
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, d == a || d == b);
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
|
||||
if (!js.carryFlagSet)
|
||||
JitGetAndClearCAOV(inst.OE);
|
||||
|
||||
bool invertedCarry = false;
|
||||
if (!add && regsource && d == b)
|
||||
// Special case: subfe A, B, B is a common compiler idiom
|
||||
if (same_input_sub)
|
||||
{
|
||||
// Convert carry to borrow
|
||||
CMC();
|
||||
if (!js.carryFlagInverted)
|
||||
CMC();
|
||||
SBB(32, gpr.R(d), gpr.R(d));
|
||||
invertedCarry = true;
|
||||
}
|
||||
else if (!add && regsource && d == b)
|
||||
{
|
||||
if (!js.carryFlagInverted)
|
||||
CMC();
|
||||
if (d != b)
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SBB(32, gpr.R(d), gpr.R(a));
|
||||
invertedCarry = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
|
||||
if (js.carryFlagInverted)
|
||||
CMC();
|
||||
if (d != a && d != b)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
if (!add)
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), source);
|
||||
}
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, invertedCarry);
|
||||
FinalizeCarryOverflow(inst.OE, invertedCarry);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1269,7 +1322,6 @@ void Jit64::arithcx(UGeckoInstruction inst)
|
|||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, d == a || d == b, true);
|
||||
JitClearCAOV(js.op->wantsCA, inst.OE);
|
||||
|
||||
if (d == a && d != b)
|
||||
{
|
||||
|
@ -1295,7 +1347,7 @@ void Jit64::arithcx(UGeckoInstruction inst)
|
|||
SUB(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
|
||||
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, !add);
|
||||
FinalizeCarryOverflow(inst.OE, !add);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1688,7 +1740,6 @@ void Jit64::srawx(UGeckoInstruction inst)
|
|||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, s, b);
|
||||
gpr.BindToRegister(a, (a == s || a == b), true);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
MOV(32, R(ECX), gpr.R(b));
|
||||
if (a != s)
|
||||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
|
@ -1699,12 +1750,12 @@ void Jit64::srawx(UGeckoInstruction inst)
|
|||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(RSCRATCH));
|
||||
JitSetCAIf(CC_NZ);
|
||||
}
|
||||
else
|
||||
{
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
}
|
||||
FinalizeCarry(CC_NZ);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc)
|
||||
|
@ -1758,14 +1809,14 @@ void Jit64::srawix(UGeckoInstruction inst)
|
|||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||
JitSetCAIf(CC_NZ);
|
||||
FinalizeCarry(CC_NZ);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.Lock(a, s);
|
||||
JitClearCAOV(js.op->wantsCA, false);
|
||||
FinalizeCarry(false);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
|
||||
if (a != s)
|
||||
|
|
|
@ -81,13 +81,16 @@ protected:
|
|||
bool isLastInstruction;
|
||||
bool memcheck;
|
||||
bool skipnext;
|
||||
bool carryFlagSet;
|
||||
bool carryFlagInverted;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
PPCAnalyst::BlockRegStats gpa;
|
||||
PPCAnalyst::BlockRegStats fpa;
|
||||
PPCAnalyst::CodeOp *op;
|
||||
PPCAnalyst::CodeOp* op;
|
||||
PPCAnalyst::CodeOp* next_op;
|
||||
u8* rewriteStart;
|
||||
|
||||
JitBlock *curBlock;
|
||||
|
|
|
@ -822,6 +822,7 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
|||
SETcc(conditionCode, R(RSCRATCH));
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
|
||||
}
|
||||
|
||||
|
|
|
@ -460,7 +460,13 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
|||
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
|
||||
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
|
||||
|
||||
// We're going to try to avoid storing carry in XER if we can avoid it -- keep it in the x86 carry flag!
|
||||
// If the instruction reads CA but doesn't write it, we still need to store CA in XER; we can't
|
||||
// leave it in flags.
|
||||
code->wantsCAInFlags = code->wantsCA && code->outputCA && code->inst.SUBOP10 != 512;
|
||||
|
||||
// mfspr/mtspr can affect/use XER, so be super careful here
|
||||
// we need to note specifically that mfspr needs CA in XER, not in the x86 carry flag
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
|
||||
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||
|
|
|
@ -35,6 +35,7 @@ struct CodeOp //16B
|
|||
bool wantsCR1;
|
||||
bool wantsFPRF;
|
||||
bool wantsCA;
|
||||
bool wantsCAInFlags;
|
||||
bool outputCR0;
|
||||
bool outputCR1;
|
||||
bool outputFPRF;
|
||||
|
|
Loading…
Reference in New Issue