Merge pull request #1834 from FioraAeterna/genericinstmerging
JIT: Generic instruction merging
This commit is contained in:
commit
90c6ebfd86
|
@ -522,6 +522,7 @@ void Jit64::Jit(u32 em_address)
|
|||
jo.enableBlocklink = false;
|
||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||
}
|
||||
Trace();
|
||||
|
@ -603,7 +604,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||
|
||||
js.skipnext = false;
|
||||
js.skipInstructions = 0;
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
js.assumeNoPairedQuantize = false;
|
||||
|
@ -651,12 +652,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
|
||||
if (i == (code_block.m_num_instructions - 1))
|
||||
{
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
js.next_inst_bp = false;
|
||||
if (Profiler::g_ProfileBlocks)
|
||||
{
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
PROFILER_VPUSH;
|
||||
// get end tic
|
||||
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
|
||||
|
@ -664,14 +662,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
PROFILER_UPDATE_TIME(b);
|
||||
PROFILER_VPOP;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
js.next_op = &ops[i + 1];
|
||||
js.next_inst_bp = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i + 1].address);
|
||||
js.isLastInstruction = true;
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
|
@ -856,11 +847,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
|
||||
}
|
||||
#endif
|
||||
if (js.skipnext)
|
||||
{
|
||||
js.skipnext = false;
|
||||
i++; // Skip next instruction
|
||||
}
|
||||
i += js.skipInstructions;
|
||||
js.skipInstructions = 0;
|
||||
}
|
||||
|
||||
u32 function = HLE::GetFunctionIndex(js.blockStart);
|
||||
|
@ -919,5 +907,6 @@ void Jit64::EnableOptimization()
|
|||
{
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||
}
|
||||
|
|
|
@ -115,6 +115,7 @@ public:
|
|||
void GenerateConstantOverflow(bool overflow);
|
||||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
bool MergeAllowedNextInstructions(int count);
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void FinalizeCarry(Gen::CCFlags cond);
|
||||
void FinalizeCarry(bool ca);
|
||||
|
|
|
@ -346,10 +346,12 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
|||
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };
|
||||
|
||||
// Merge neighboring fcmp and cror (the primary use of cror).
|
||||
UGeckoInstruction next = js.next_inst;
|
||||
if (next.OPCD == 19 && next.SUBOP10 == 449 && (next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
|
||||
UGeckoInstruction next = js.op[1].inst;
|
||||
if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE) &&
|
||||
MergeAllowedNextInstructions(1) && next.OPCD == 19 && next.SUBOP10 == 449 &&
|
||||
(next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
|
||||
{
|
||||
js.skipnext = true;
|
||||
js.skipInstructions = 1;
|
||||
js.downcountAmount++;
|
||||
int dst = 3 - (next.CRBD & 3);
|
||||
output[3 - (next.CRBD & 3)] &= ~(1 << dst);
|
||||
|
|
|
@ -50,14 +50,30 @@ void Jit64::GenerateOverflow()
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
bool Jit64::MergeAllowedNextInstructions(int count)
|
||||
{
|
||||
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
|
||||
return false;
|
||||
// Be careful: a breakpoint kills flags in between instructions
|
||||
for (int i = 1; i <= count; i++)
|
||||
{
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
||||
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
|
||||
return false;
|
||||
if (js.op[i].isBranchTarget)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Jit64::FinalizeCarry(CCFlags cond)
|
||||
{
|
||||
js.carryFlagSet = false;
|
||||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
// Be careful: a breakpoint kills flags in between instructions
|
||||
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
|
||||
// Not actually merging instructions, but the effect is equivalent (we can't have breakpoints/etc in between).
|
||||
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
|
||||
{
|
||||
if (cond == CC_C || cond == CC_NC)
|
||||
{
|
||||
|
@ -86,7 +102,7 @@ void Jit64::FinalizeCarry(bool ca)
|
|||
js.carryFlagInverted = false;
|
||||
if (js.op->wantsCA)
|
||||
{
|
||||
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
|
||||
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
|
||||
{
|
||||
if (ca)
|
||||
STC();
|
||||
|
@ -331,7 +347,10 @@ bool Jit64::CheckMergedBranch(int crf)
|
|||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
|
||||
return false;
|
||||
|
||||
const UGeckoInstruction& next = js.next_inst;
|
||||
if (!MergeAllowedNextInstructions(1))
|
||||
return false;
|
||||
|
||||
const UGeckoInstruction& next = js.op[1].inst;
|
||||
return (((next.OPCD == 16 /* bcx */) ||
|
||||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
|
||||
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
|
||||
|
@ -343,33 +362,35 @@ bool Jit64::CheckMergedBranch(int crf)
|
|||
void Jit64::DoMergedBranch()
|
||||
{
|
||||
// Code that handles successful PPC branching.
|
||||
if (js.next_inst.OPCD == 16) // bcx
|
||||
const UGeckoInstruction& next = js.op[1].inst;
|
||||
const u32 nextPC = js.op[1].address;
|
||||
if (next.OPCD == 16) // bcx
|
||||
{
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||
if (next.LK)
|
||||
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||
|
||||
u32 destination;
|
||||
if (js.next_inst.AA)
|
||||
destination = SignExt16(js.next_inst.BD << 2);
|
||||
if (next.AA)
|
||||
destination = SignExt16(next.BD << 2);
|
||||
else
|
||||
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
||||
WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
|
||||
destination = nextPC + SignExt16(next.BD << 2);
|
||||
WriteExit(destination, next.LK, nextPC + 4);
|
||||
}
|
||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
|
||||
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
|
||||
{
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||
if (next.LK)
|
||||
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||
MOV(32, R(RSCRATCH), M(&CTR));
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
|
||||
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
|
||||
}
|
||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
||||
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
|
||||
{
|
||||
MOV(32, R(RSCRATCH), M(&LR));
|
||||
if (!m_enable_blr_optimization)
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||
if (next.LK)
|
||||
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||
WriteBLRExit();
|
||||
}
|
||||
else
|
||||
|
@ -381,9 +402,11 @@ void Jit64::DoMergedBranch()
|
|||
void Jit64::DoMergedBranchCondition()
|
||||
{
|
||||
js.downcountAmount++;
|
||||
js.skipnext = true;
|
||||
int test_bit = 8 >> (js.next_inst.BI & 3);
|
||||
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
|
||||
js.skipInstructions = 1;
|
||||
const UGeckoInstruction& next = js.op[1].inst;
|
||||
int test_bit = 8 >> (next.BI & 3);
|
||||
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
|
||||
const u32 nextPC = js.op[1].address;
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -408,16 +431,18 @@ void Jit64::DoMergedBranchCondition()
|
|||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
WriteExit(js.next_compilerPC + 4);
|
||||
WriteExit(nextPC + 4);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::DoMergedBranchImmediate(s64 val)
|
||||
{
|
||||
js.downcountAmount++;
|
||||
js.skipnext = true;
|
||||
int test_bit = 8 >> (js.next_inst.BI & 3);
|
||||
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
|
||||
js.skipInstructions = 1;
|
||||
const UGeckoInstruction& next = js.op[1].inst;
|
||||
int test_bit = 8 >> (next.BI & 3);
|
||||
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
|
||||
const u32 nextPC = js.op[1].address;
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -441,7 +466,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
|
|||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
WriteExit(js.next_compilerPC + 4);
|
||||
WriteExit(nextPC + 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -95,15 +95,12 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
|
||||
if (accessSize == 8 && js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 954 &&
|
||||
js.next_inst.RS == inst.RD && js.next_inst.RA == inst.RD && !js.next_inst.Rc)
|
||||
if (MergeAllowedNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && js.op[1].inst.SUBOP10 == 954 &&
|
||||
js.op[1].inst.RS == inst.RD && js.op[1].inst.RA == inst.RD && !js.op[1].inst.Rc)
|
||||
{
|
||||
if (PowerPC::GetState() != PowerPC::CPU_STEPPING)
|
||||
{
|
||||
js.downcountAmount++;
|
||||
js.skipnext = true;
|
||||
signExtend = true;
|
||||
}
|
||||
js.downcountAmount++;
|
||||
js.skipInstructions = 1;
|
||||
signExtend = true;
|
||||
}
|
||||
|
||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||
|
|
|
@ -282,38 +282,38 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
ADD(64, R(RAX), R(RDX));
|
||||
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
|
||||
|
||||
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
|
||||
// if we can.
|
||||
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
|
||||
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
|
||||
int n = js.next_inst.RD;
|
||||
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) &&
|
||||
PowerPC::GetState() != PowerPC::CPU_STEPPING && n != d)
|
||||
if (MergeAllowedNextInstructions(1))
|
||||
{
|
||||
js.downcountAmount++;
|
||||
js.skipnext = true;
|
||||
gpr.Lock(d, n);
|
||||
gpr.BindToRegister(d, false);
|
||||
gpr.BindToRegister(n, false);
|
||||
if (iIndex == SPR_TL)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TL)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
if (iIndex == SPR_TU)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TU)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
if (iIndex == SPR_TU)
|
||||
const UGeckoInstruction& next = js.op[1].inst;
|
||||
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
|
||||
// if we can.
|
||||
u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
|
||||
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
|
||||
int n = next.RD;
|
||||
if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
|
||||
{
|
||||
js.downcountAmount++;
|
||||
js.skipInstructions = 1;
|
||||
gpr.Lock(d, n);
|
||||
gpr.BindToRegister(d, false);
|
||||
gpr.BindToRegister(n, false);
|
||||
if (iIndex == SPR_TL)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TL)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (iIndex == SPR_TU)
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TU)
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
break;
|
||||
}
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
if (iIndex == SPR_TU)
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
break;
|
||||
}
|
||||
case SPR_XER:
|
||||
|
@ -341,6 +341,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
|
||||
break;
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
|
|
@ -610,16 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
js.downcountAmount += opinfo->numCycles;
|
||||
|
||||
if (i == (code_block.m_num_instructions - 1))
|
||||
{
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
}
|
||||
|
||||
u32 function = HLE::GetFunctionIndex(ops[i].address);
|
||||
if (function != 0)
|
||||
|
|
|
@ -443,7 +443,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
|||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
||||
|
||||
js.skipnext = false;
|
||||
js.skipInstructions = 0;
|
||||
js.compilerPC = nextPC;
|
||||
|
||||
// Translate instructions
|
||||
|
@ -459,13 +459,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
|||
{
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
|
|
|
@ -232,7 +232,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
|||
js.blockStart = em_address;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.downcountAmount = 0;
|
||||
js.skipnext = false;
|
||||
js.skipInstructions = 0;
|
||||
js.curBlock = b;
|
||||
|
||||
u32 nextPC = em_address;
|
||||
|
@ -281,13 +281,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
|||
{
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
|
|
|
@ -65,9 +65,7 @@ protected:
|
|||
struct JitState
|
||||
{
|
||||
u32 compilerPC;
|
||||
u32 next_compilerPC;
|
||||
u32 blockStart;
|
||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||
int instructionNumber;
|
||||
int instructionsLeft;
|
||||
int downcountAmount;
|
||||
|
@ -88,10 +86,9 @@ protected:
|
|||
bool firstFPInstructionFound;
|
||||
bool isLastInstruction;
|
||||
bool memcheck;
|
||||
bool skipnext;
|
||||
int skipInstructions;
|
||||
bool carryFlagSet;
|
||||
bool carryFlagInverted;
|
||||
bool next_inst_bp;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
|
@ -99,7 +96,6 @@ protected:
|
|||
PPCAnalyst::BlockRegStats gpa;
|
||||
PPCAnalyst::BlockRegStats fpa;
|
||||
PPCAnalyst::CodeOp* op;
|
||||
PPCAnalyst::CodeOp* next_op;
|
||||
u8* rewriteStart;
|
||||
|
||||
JitBlock *curBlock;
|
||||
|
|
|
@ -219,6 +219,11 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
|||
const GekkoOPInfo *b_info = b.opinfo;
|
||||
int a_flags = a_info->flags;
|
||||
int b_flags = b_info->flags;
|
||||
|
||||
// can't reorder around breakpoints
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
||||
(PowerPC::breakpoints.IsAddressBreakPoint(a.address) || PowerPC::breakpoints.IsAddressBreakPoint(b.address)))
|
||||
return false;
|
||||
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
|
||||
return false;
|
||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
|
||||
|
@ -462,7 +467,8 @@ void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
|
|||
// Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
|
||||
// picky about this, but cror seems to almost solely be used for this purpose in real code.
|
||||
// Additionally, the other boolean ops seem to almost never be used.
|
||||
ReorderInstructionsCore(instructions, code, true, REORDER_CROR);
|
||||
if (HasOption(OPTION_CROR_MERGE))
|
||||
ReorderInstructionsCore(instructions, code, true, REORDER_CROR);
|
||||
// For carry, bubble instructions *towards* each other; one direction often isn't enough
|
||||
// to get pairs like addc/adde next to each other.
|
||||
if (HasOption(OPTION_CARRY_MERGE))
|
||||
|
|
|
@ -214,6 +214,9 @@ public:
|
|||
// Reorder carry instructions next to their associated branches and pass
|
||||
// carry flags in the x86 flags between them, instead of in XER.
|
||||
OPTION_CARRY_MERGE = (1 << 5),
|
||||
|
||||
// Reorder cror instructions next to their associated fcmp.
|
||||
OPTION_CROR_MERGE = (1 << 6),
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue