JIT64: optimize carry calculations further

Keep carry flags in the x86 flags register if used in the next instruction.
This commit is contained in:
Fiora 2014-09-07 00:37:47 -07:00
parent bea2504a51
commit 45d84605a9
8 changed files with 109 additions and 42 deletions

View File

@ -457,6 +457,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
js.skipnext = false; js.skipnext = false;
js.carryFlagSet = false;
js.carryFlagInverted = false;
js.compilerPC = nextPC; js.compilerPC = nextPC;
// Translate instructions // Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++) for (u32 i = 0; i < code_block.m_num_instructions; i++)
@ -488,6 +490,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// help peephole optimizations // help peephole optimizations
js.next_inst = ops[i + 1].inst; js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address; js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)

View File

@ -100,7 +100,9 @@ public:
void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// Use to extract bytes from a register using the regcache. offset is in bytes. // Use to extract bytes from a register using the regcache. offset is in bytes.

View File

@ -273,7 +273,7 @@ static GekkoOPTemplate table31[] =
{339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, {467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, {371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA}},
{595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},

View File

@ -44,30 +44,76 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit); SetJumpTarget(exit);
} }
void Jit64::FinalizeCarry(CCFlags cond)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (cond == CC_C || cond == CC_NC)
{
js.carryFlagInverted = cond == CC_NC;
}
else
{
// convert the condition to a carry flag (is there a better way?)
SETcc(cond, R(RSCRATCH));
BT(8, R(RSCRATCH), Imm8(0));
}
js.carryFlagSet = true;
}
else
{
JitSetCAIf(cond);
}
}
}
// Unconditional version
void Jit64::FinalizeCarry(bool ca)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (ca)
STC();
else
CLC();
js.carryFlagSet = true;
}
else if (ca)
{
JitSetCA();
}
else
{
JitClearCAOV(true, false);
}
}
}
// Assumes CA,OV are clear // Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool ca, bool oe, bool inv) void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
{ {
// USES_XER // USES_XER
if (oe) if (oe)
{ {
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both // Make sure not to lose the carry flags (not a big deal, this path is rare).
// sides of the branch. PUSHF();
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
if (ca)
JitSetCAIf(inv ? CC_NC : CC_C);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
if (ca) POPF();
JitSetCAIf(inv ? CC_NC : CC_C);
SetJumpTarget(exit);
}
else if (ca)
{
// Do carry
JitSetCAIf(inv ? CC_NC : CC_C);
} }
// Do carry
FinalizeCarry(inv ? CC_NC : CC_C);
} }
void Jit64::ComputeRC(const Gen::OpArg & arg) void Jit64::ComputeRC(const Gen::OpArg & arg)
@ -135,7 +181,6 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
if (a || binary || carry) if (a || binary || carry)
{ {
carry &= js.op->wantsCA; carry &= js.op->wantsCA;
JitClearCAOV(carry, false);
if (gpr.R(a).IsImm() && !carry) if (gpr.R(a).IsImm() && !carry)
{ {
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value)); gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
@ -159,7 +204,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
} }
if (carry) if (carry)
JitSetCAIf(CC_C); FinalizeCarry(CC_C);
if (Rc) if (Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
} }
@ -720,38 +765,31 @@ void Jit64::subfic(UGeckoInstruction inst)
{ {
if (imm == 0) if (imm == 0)
{ {
JitClearCAOV(js.op->wantsCA, false);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
// Output carry is inverted // Output carry is inverted
if (js.op->wantsCA) FinalizeCarry(CC_NC);
JitSetCAIf(CC_NC);
} }
else if (imm == -1) else if (imm == -1)
{ {
// CA is always set in this case
if (js.op->wantsCA)
JitSetCA();
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
// CA is always set in this case
FinalizeCarry(true);
} }
else else
{ {
JitClearCAOV(js.op->wantsCA, false);
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADD(32, gpr.R(d), Imm32(imm+1)); ADD(32, gpr.R(d), Imm32(imm+1));
// Output carry is normal // Output carry is normal
if (js.op->wantsCA) FinalizeCarry(CC_C);
JitSetCAIf(CC_C);
} }
} }
else else
{ {
JitClearCAOV(js.op->wantsCA, false);
MOV(32, gpr.R(d), Imm32(imm)); MOV(32, gpr.R(d), Imm32(imm));
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
// Output carry is inverted // Output carry is inverted
if (js.op->wantsCA) FinalizeCarry(CC_NC);
JitSetCAIf(CC_NC);
} }
gpr.UnlockAll(); gpr.UnlockAll();
// This instruction has no RC flag // This instruction has no RC flag
@ -1233,29 +1271,44 @@ void Jit64::arithXex(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = regsource ? inst.RB : a; int b = regsource ? inst.RB : a;
int d = inst.RD; int d = inst.RD;
bool same_input_sub = !add && regsource && a == b;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, d == a || d == b); gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
JitGetAndClearCAOV(inst.OE); if (!js.carryFlagSet)
JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false; bool invertedCarry = false;
if (!add && regsource && d == b) // Special case: subfe A, B, B is a common compiler idiom
if (same_input_sub)
{ {
// Convert carry to borrow // Convert carry to borrow
CMC(); if (!js.carryFlagInverted)
CMC();
SBB(32, gpr.R(d), gpr.R(d));
invertedCarry = true;
}
else if (!add && regsource && d == b)
{
if (!js.carryFlagInverted)
CMC();
if (d != b)
MOV(32, gpr.R(d), gpr.R(b));
SBB(32, gpr.R(d), gpr.R(a)); SBB(32, gpr.R(d), gpr.R(a));
invertedCarry = true; invertedCarry = true;
} }
else else
{ {
OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0); OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
if (js.carryFlagInverted)
CMC();
if (d != a && d != b) if (d != a && d != b)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
if (!add) if (!add)
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), source); ADC(32, gpr.R(d), source);
} }
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, invertedCarry); FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1269,7 +1322,6 @@ void Jit64::arithcx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, d == a || d == b, true); gpr.BindToRegister(d, d == a || d == b, true);
JitClearCAOV(js.op->wantsCA, inst.OE);
if (d == a && d != b) if (d == a && d != b)
{ {
@ -1295,7 +1347,7 @@ void Jit64::arithcx(UGeckoInstruction inst)
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
} }
FinalizeCarryOverflow(js.op->wantsCA, inst.OE, !add); FinalizeCarryOverflow(inst.OE, !add);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1688,7 +1740,6 @@ void Jit64::srawx(UGeckoInstruction inst)
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCAOV(js.op->wantsCA, false);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
@ -1699,12 +1750,12 @@ void Jit64::srawx(UGeckoInstruction inst)
MOV(32, R(RSCRATCH), gpr.R(a)); MOV(32, R(RSCRATCH), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(RSCRATCH)); TEST(32, gpr.R(a), R(RSCRATCH));
JitSetCAIf(CC_NZ);
} }
else else
{ {
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
} }
FinalizeCarry(CC_NZ);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
if (inst.Rc) if (inst.Rc)
@ -1758,14 +1809,14 @@ void Jit64::srawix(UGeckoInstruction inst)
SAR(32, gpr.R(a), Imm8(amount)); SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount)); SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a)); TEST(32, R(RSCRATCH), gpr.R(a));
JitSetCAIf(CC_NZ); FinalizeCarry(CC_NZ);
} }
} }
} }
else else
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
JitClearCAOV(js.op->wantsCA, false); FinalizeCarry(false);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)

View File

@ -81,13 +81,16 @@ protected:
bool isLastInstruction; bool isLastInstruction;
bool memcheck; bool memcheck;
bool skipnext; bool skipnext;
bool carryFlagSet;
bool carryFlagInverted;
int fifoBytesThisBlock; int fifoBytesThisBlock;
PPCAnalyst::BlockStats st; PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa; PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp *op; PPCAnalyst::CodeOp* op;
PPCAnalyst::CodeOp* next_op;
u8* rewriteStart; u8* rewriteStart;
JitBlock *curBlock; JitBlock *curBlock;

View File

@ -822,6 +822,7 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
SETcc(conditionCode, R(RSCRATCH)); SETcc(conditionCode, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH)); MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1 OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
} }

View File

@ -460,7 +460,13 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false; code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false; code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
// We're going to try to avoid storing carry in XER if we can avoid it -- keep it in the x86 carry flag!
// If the instruction reads CA but doesn't write it, we still need to store CA in XER; we can't
// leave it in flags.
code->wantsCAInFlags = code->wantsCA && code->outputCA && code->inst.SUBOP10 != 512;
// mfspr/mtspr can affect/use XER, so be super careful here // mfspr/mtspr can affect/use XER, so be super careful here
// we need to note specifically that mfspr needs CA in XER, not in the x86 carry flag
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER; code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr

View File

@ -35,6 +35,7 @@ struct CodeOp //16B
bool wantsCR1; bool wantsCR1;
bool wantsFPRF; bool wantsFPRF;
bool wantsCA; bool wantsCA;
bool wantsCAInFlags;
bool outputCR0; bool outputCR0;
bool outputCR1; bool outputCR1;
bool outputFPRF; bool outputFPRF;