JIT: make instruction merging generic

Now it should be easier to merge more than 2-instruction-long sequences.
Also correct some minor inconsistencies in behavior between instruction
merging cases.
This commit is contained in:
Fiora 2015-01-03 22:59:28 -08:00
parent 074f246c69
commit e8cfcd3aeb
12 changed files with 112 additions and 115 deletions

View File

@ -522,6 +522,7 @@ void Jit64::Jit(u32 em_address)
jo.enableBlocklink = false; jo.enableBlocklink = false;
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE); analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
} }
Trace(); Trace();
@ -603,7 +604,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
js.skipnext = false; js.skipInstructions = 0;
js.carryFlagSet = false; js.carryFlagSet = false;
js.carryFlagInverted = false; js.carryFlagInverted = false;
js.assumeNoPairedQuantize = false; js.assumeNoPairedQuantize = false;
@ -651,12 +652,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (i == (code_block.m_num_instructions - 1)) if (i == (code_block.m_num_instructions - 1))
{ {
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
js.next_inst_bp = false;
if (Profiler::g_ProfileBlocks) if (Profiler::g_ProfileBlocks)
{ {
// WARNING - cmp->branch merging will screw this up.
PROFILER_VPUSH; PROFILER_VPUSH;
// get end tic // get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
@ -664,14 +662,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
PROFILER_UPDATE_TIME(b); PROFILER_UPDATE_TIME(b);
PROFILER_VPOP; PROFILER_VPOP;
} }
} js.isLastInstruction = true;
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
js.next_inst_bp = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i + 1].address);
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
@ -856,11 +847,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str()); //NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
} }
#endif #endif
if (js.skipnext) i += js.skipInstructions;
{ js.skipInstructions = 0;
js.skipnext = false;
i++; // Skip next instruction
}
} }
u32 function = HLE::GetFunctionIndex(js.blockStart); u32 function = HLE::GetFunctionIndex(js.blockStart);
@ -919,5 +907,6 @@ void Jit64::EnableOptimization()
{ {
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
} }

View File

@ -115,6 +115,7 @@ public:
void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(bool overflow);
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
bool MergeAllowedNextInstructions(int count);
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca); void FinalizeCarry(bool ca);

View File

@ -346,10 +346,12 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT }; int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };
// Merge neighboring fcmp and cror (the primary use of cror). // Merge neighboring fcmp and cror (the primary use of cror).
UGeckoInstruction next = js.next_inst; UGeckoInstruction next = js.op[1].inst;
if (next.OPCD == 19 && next.SUBOP10 == 449 && (next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf) if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE) &&
MergeAllowedNextInstructions(1) && next.OPCD == 19 && next.SUBOP10 == 449 &&
(next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
{ {
js.skipnext = true; js.skipInstructions = 1;
js.downcountAmount++; js.downcountAmount++;
int dst = 3 - (next.CRBD & 3); int dst = 3 - (next.CRBD & 3);
output[3 - (next.CRBD & 3)] &= ~(1 << dst); output[3 - (next.CRBD & 3)] &= ~(1 << dst);

View File

@ -50,14 +50,30 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit); SetJumpTarget(exit);
} }
bool Jit64::MergeAllowedNextInstructions(int count)
{
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
return false;
// Be careful: a breakpoint kills flags in between instructions
for (int i = 1; i <= count; i++)
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
return false;
if (js.op[i].isBranchTarget)
return false;
}
return true;
}
void Jit64::FinalizeCarry(CCFlags cond) void Jit64::FinalizeCarry(CCFlags cond)
{ {
js.carryFlagSet = false; js.carryFlagSet = false;
js.carryFlagInverted = false; js.carryFlagInverted = false;
if (js.op->wantsCA) if (js.op->wantsCA)
{ {
// Be careful: a breakpoint kills flags in between instructions // Not actually merging instructions, but the effect is equivalent (we can't have breakpoints/etc in between).
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp) if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{ {
if (cond == CC_C || cond == CC_NC) if (cond == CC_C || cond == CC_NC)
{ {
@ -86,7 +102,7 @@ void Jit64::FinalizeCarry(bool ca)
js.carryFlagInverted = false; js.carryFlagInverted = false;
if (js.op->wantsCA) if (js.op->wantsCA)
{ {
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp) if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
{ {
if (ca) if (ca)
STC(); STC();
@ -331,7 +347,10 @@ bool Jit64::CheckMergedBranch(int crf)
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
return false; return false;
const UGeckoInstruction& next = js.next_inst; if (!MergeAllowedNextInstructions(1))
return false;
const UGeckoInstruction& next = js.op[1].inst;
return (((next.OPCD == 16 /* bcx */) || return (((next.OPCD == 16 /* bcx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) || ((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) && ((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
@ -343,33 +362,35 @@ bool Jit64::CheckMergedBranch(int crf)
void Jit64::DoMergedBranch() void Jit64::DoMergedBranch()
{ {
// Code that handles successful PPC branching. // Code that handles successful PPC branching.
if (js.next_inst.OPCD == 16) // bcx const UGeckoInstruction& next = js.op[1].inst;
const u32 nextPC = js.op[1].address;
if (next.OPCD == 16) // bcx
{ {
if (js.next_inst.LK) if (next.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, M(&LR), Imm32(nextPC + 4));
u32 destination; u32 destination;
if (js.next_inst.AA) if (next.AA)
destination = SignExt16(js.next_inst.BD << 2); destination = SignExt16(next.BD << 2);
else else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); destination = nextPC + SignExt16(next.BD << 2);
WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4); WriteExit(destination, next.LK, nextPC + 4);
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
{ {
if (js.next_inst.LK) if (next.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, M(&LR), Imm32(nextPC + 4));
MOV(32, R(RSCRATCH), M(&CTR)); MOV(32, R(RSCRATCH), M(&CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4); WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
{ {
MOV(32, R(RSCRATCH), M(&LR)); MOV(32, R(RSCRATCH), M(&LR));
if (!m_enable_blr_optimization) if (!m_enable_blr_optimization)
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK) if (next.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, M(&LR), Imm32(nextPC + 4));
WriteBLRExit(); WriteBLRExit();
} }
else else
@ -381,9 +402,11 @@ void Jit64::DoMergedBranch()
void Jit64::DoMergedBranchCondition() void Jit64::DoMergedBranchCondition()
{ {
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipInstructions = 1;
int test_bit = 8 >> (js.next_inst.BI & 3); const UGeckoInstruction& next = js.op[1].inst;
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE); int test_bit = 8 >> (next.BI & 3);
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -408,16 +431,18 @@ void Jit64::DoMergedBranchCondition()
{ {
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
WriteExit(js.next_compilerPC + 4); WriteExit(nextPC + 4);
} }
} }
void Jit64::DoMergedBranchImmediate(s64 val) void Jit64::DoMergedBranchImmediate(s64 val)
{ {
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipInstructions = 1;
int test_bit = 8 >> (js.next_inst.BI & 3); const UGeckoInstruction& next = js.op[1].inst;
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE); int test_bit = 8 >> (next.BI & 3);
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
const u32 nextPC = js.op[1].address;
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -441,7 +466,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
{ {
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
WriteExit(js.next_compilerPC + 4); WriteExit(nextPC + 4);
} }
} }

View File

@ -95,16 +95,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
} }
// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it. // PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
if (accessSize == 8 && js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 954 && if (MergeAllowedNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && js.op[1].inst.SUBOP10 == 954 &&
js.next_inst.RS == inst.RD && js.next_inst.RA == inst.RD && !js.next_inst.Rc) js.op[1].inst.RS == inst.RD && js.op[1].inst.RA == inst.RD && !js.op[1].inst.Rc)
{
if (PowerPC::GetState() != PowerPC::CPU_STEPPING)
{ {
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipInstructions = 1;
signExtend = true; signExtend = true;
} }
}
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
// Will give nice boost to dual core mode // Will give nice boost to dual core mode

View File

@ -282,16 +282,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
ADD(64, R(RAX), R(RDX)); ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
if (MergeAllowedNextInstructions(1))
{
const UGeckoInstruction& next = js.op[1].inst;
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can. // if we can.
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F); u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339) // Be careful; the actual opcode is for mftb (371), not mfspr (339)
int n = js.next_inst.RD; int n = next.RD;
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
PowerPC::GetState() != PowerPC::CPU_STEPPING && n != d)
{ {
js.downcountAmount++; js.downcountAmount++;
js.skipnext = true; js.skipInstructions = 1;
gpr.Lock(d, n); gpr.Lock(d, n);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false); gpr.BindToRegister(n, false);
@ -304,16 +306,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
MOV(32, gpr.R(d), R(RAX)); MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU) if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(RAX)); MOV(32, gpr.R(n), R(RAX));
break;
}
} }
else
{
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
if (iIndex == SPR_TU) if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32)); SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(RAX)); MOV(32, gpr.R(d), R(RAX));
}
gpr.UnlockAllX();
break; break;
} }
case SPR_XER: case SPR_XER:
@ -341,6 +341,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex])); MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
break; break;
} }
gpr.UnlockAllX();
gpr.UnlockAll(); gpr.UnlockAll();
} }

View File

@ -610,16 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += opinfo->numCycles; js.downcountAmount += opinfo->numCycles;
if (i == (code_block.m_num_instructions - 1)) if (i == (code_block.m_num_instructions - 1))
{
js.isLastInstruction = true; js.isLastInstruction = true;
js.next_inst = 0;
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}
u32 function = HLE::GetFunctionIndex(ops[i].address); u32 function = HLE::GetFunctionIndex(ops[i].address);
if (function != 0) if (function != 0)

View File

@ -443,7 +443,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
js.skipnext = false; js.skipInstructions = 0;
js.compilerPC = nextPC; js.compilerPC = nextPC;
// Translate instructions // Translate instructions
@ -459,13 +459,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
{ {
// WARNING - cmp->branch merging will screw this up. // WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true; js.isLastInstruction = true;
js.next_inst = 0;
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)

View File

@ -232,7 +232,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
js.blockStart = em_address; js.blockStart = em_address;
js.fifoBytesThisBlock = 0; js.fifoBytesThisBlock = 0;
js.downcountAmount = 0; js.downcountAmount = 0;
js.skipnext = false; js.skipInstructions = 0;
js.curBlock = b; js.curBlock = b;
u32 nextPC = em_address; u32 nextPC = em_address;
@ -281,13 +281,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
{ {
// WARNING - cmp->branch merging will screw this up. // WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true; js.isLastInstruction = true;
js.next_inst = 0;
}
else
{
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)

View File

@ -65,9 +65,7 @@ protected:
struct JitState struct JitState
{ {
u32 compilerPC; u32 compilerPC;
u32 next_compilerPC;
u32 blockStart; u32 blockStart;
UGeckoInstruction next_inst; // for easy peephole opt.
int instructionNumber; int instructionNumber;
int instructionsLeft; int instructionsLeft;
int downcountAmount; int downcountAmount;
@ -88,10 +86,9 @@ protected:
bool firstFPInstructionFound; bool firstFPInstructionFound;
bool isLastInstruction; bool isLastInstruction;
bool memcheck; bool memcheck;
bool skipnext; int skipInstructions;
bool carryFlagSet; bool carryFlagSet;
bool carryFlagInverted; bool carryFlagInverted;
bool next_inst_bp;
int fifoBytesThisBlock; int fifoBytesThisBlock;
@ -99,7 +96,6 @@ protected:
PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa; PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp* op; PPCAnalyst::CodeOp* op;
PPCAnalyst::CodeOp* next_op;
u8* rewriteStart; u8* rewriteStart;
JitBlock *curBlock; JitBlock *curBlock;

View File

@ -219,6 +219,11 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
const GekkoOPInfo *b_info = b.opinfo; const GekkoOPInfo *b_info = b.opinfo;
int a_flags = a_info->flags; int a_flags = a_info->flags;
int b_flags = b_info->flags; int b_flags = b_info->flags;
// can't reorder around breakpoints
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
(PowerPC::breakpoints.IsAddressBreakPoint(a.address) || PowerPC::breakpoints.IsAddressBreakPoint(b.address)))
return false;
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE)) if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
return false; return false;
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc)) if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
@ -462,6 +467,7 @@ void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
// Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more // Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
// picky about this, but cror seems to almost solely be used for this purpose in real code. // picky about this, but cror seems to almost solely be used for this purpose in real code.
// Additionally, the other boolean ops seem to almost never be used. // Additionally, the other boolean ops seem to almost never be used.
if (HasOption(OPTION_CROR_MERGE))
ReorderInstructionsCore(instructions, code, true, REORDER_CROR); ReorderInstructionsCore(instructions, code, true, REORDER_CROR);
// For carry, bubble instructions *towards* each other; one direction often isn't enough // For carry, bubble instructions *towards* each other; one direction often isn't enough
// to get pairs like addc/adde next to each other. // to get pairs like addc/adde next to each other.

View File

@ -214,6 +214,9 @@ public:
// Reorder carry instructions next to their associated branches and pass // Reorder carry instructions next to their associated branches and pass
// carry flags in the x86 flags between them, instead of in XER. // carry flags in the x86 flags between them, instead of in XER.
OPTION_CARRY_MERGE = (1 << 5), OPTION_CARRY_MERGE = (1 << 5),
// Reorder cror instructions next to their associated fcmp.
OPTION_CROR_MERGE = (1 << 6),
}; };