JIT: make instruction merging generic
Now it should be easier to merge more than 2-instruction-long sequences. Also correct some minor inconsistencies in behavior between instruction merging cases.
This commit is contained in:
parent
074f246c69
commit
e8cfcd3aeb
|
@ -522,6 +522,7 @@ void Jit64::Jit(u32 em_address)
|
||||||
jo.enableBlocklink = false;
|
jo.enableBlocklink = false;
|
||||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
||||||
|
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||||
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
analyzer.ClearOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||||
}
|
}
|
||||||
Trace();
|
Trace();
|
||||||
|
@ -603,7 +604,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
|
||||||
|
|
||||||
js.skipnext = false;
|
js.skipInstructions = 0;
|
||||||
js.carryFlagSet = false;
|
js.carryFlagSet = false;
|
||||||
js.carryFlagInverted = false;
|
js.carryFlagInverted = false;
|
||||||
js.assumeNoPairedQuantize = false;
|
js.assumeNoPairedQuantize = false;
|
||||||
|
@ -651,12 +652,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
|
|
||||||
if (i == (code_block.m_num_instructions - 1))
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
|
||||||
js.isLastInstruction = true;
|
|
||||||
js.next_inst = 0;
|
|
||||||
js.next_inst_bp = false;
|
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
PROFILER_VPUSH;
|
PROFILER_VPUSH;
|
||||||
// get end tic
|
// get end tic
|
||||||
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
|
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
|
||||||
|
@ -664,14 +662,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
PROFILER_UPDATE_TIME(b);
|
PROFILER_UPDATE_TIME(b);
|
||||||
PROFILER_VPOP;
|
PROFILER_VPOP;
|
||||||
}
|
}
|
||||||
}
|
js.isLastInstruction = true;
|
||||||
else
|
|
||||||
{
|
|
||||||
// help peephole optimizations
|
|
||||||
js.next_inst = ops[i + 1].inst;
|
|
||||||
js.next_compilerPC = ops[i + 1].address;
|
|
||||||
js.next_op = &ops[i + 1];
|
|
||||||
js.next_inst_bp = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && breakpoints.IsAddressBreakPoint(ops[i + 1].address);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||||
|
@ -856,11 +847,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
|
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (js.skipnext)
|
i += js.skipInstructions;
|
||||||
{
|
js.skipInstructions = 0;
|
||||||
js.skipnext = false;
|
|
||||||
i++; // Skip next instruction
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 function = HLE::GetFunctionIndex(js.blockStart);
|
u32 function = HLE::GetFunctionIndex(js.blockStart);
|
||||||
|
@ -919,5 +907,6 @@ void Jit64::EnableOptimization()
|
||||||
{
|
{
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
|
||||||
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE);
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,6 +115,7 @@ public:
|
||||||
void GenerateConstantOverflow(bool overflow);
|
void GenerateConstantOverflow(bool overflow);
|
||||||
void GenerateConstantOverflow(s64 val);
|
void GenerateConstantOverflow(s64 val);
|
||||||
void GenerateOverflow();
|
void GenerateOverflow();
|
||||||
|
bool MergeAllowedNextInstructions(int count);
|
||||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||||
void FinalizeCarry(Gen::CCFlags cond);
|
void FinalizeCarry(Gen::CCFlags cond);
|
||||||
void FinalizeCarry(bool ca);
|
void FinalizeCarry(bool ca);
|
||||||
|
|
|
@ -346,10 +346,12 @@ void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
|
||||||
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };
|
int output[4] = { CR_SO, CR_EQ, CR_GT, CR_LT };
|
||||||
|
|
||||||
// Merge neighboring fcmp and cror (the primary use of cror).
|
// Merge neighboring fcmp and cror (the primary use of cror).
|
||||||
UGeckoInstruction next = js.next_inst;
|
UGeckoInstruction next = js.op[1].inst;
|
||||||
if (next.OPCD == 19 && next.SUBOP10 == 449 && (next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
|
if (analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CROR_MERGE) &&
|
||||||
|
MergeAllowedNextInstructions(1) && next.OPCD == 19 && next.SUBOP10 == 449 &&
|
||||||
|
(next.CRBA >> 2) == crf && (next.CRBB >> 2) == crf && (next.CRBD >> 2) == crf)
|
||||||
{
|
{
|
||||||
js.skipnext = true;
|
js.skipInstructions = 1;
|
||||||
js.downcountAmount++;
|
js.downcountAmount++;
|
||||||
int dst = 3 - (next.CRBD & 3);
|
int dst = 3 - (next.CRBD & 3);
|
||||||
output[3 - (next.CRBD & 3)] &= ~(1 << dst);
|
output[3 - (next.CRBD & 3)] &= ~(1 << dst);
|
||||||
|
|
|
@ -50,14 +50,30 @@ void Jit64::GenerateOverflow()
|
||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Jit64::MergeAllowedNextInstructions(int count)
|
||||||
|
{
|
||||||
|
if (PowerPC::GetState() == PowerPC::CPU_STEPPING || js.instructionsLeft < count)
|
||||||
|
return false;
|
||||||
|
// Be careful: a breakpoint kills flags in between instructions
|
||||||
|
for (int i = 1; i <= count; i++)
|
||||||
|
{
|
||||||
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
||||||
|
PowerPC::breakpoints.IsAddressBreakPoint(js.op[i].address))
|
||||||
|
return false;
|
||||||
|
if (js.op[i].isBranchTarget)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::FinalizeCarry(CCFlags cond)
|
void Jit64::FinalizeCarry(CCFlags cond)
|
||||||
{
|
{
|
||||||
js.carryFlagSet = false;
|
js.carryFlagSet = false;
|
||||||
js.carryFlagInverted = false;
|
js.carryFlagInverted = false;
|
||||||
if (js.op->wantsCA)
|
if (js.op->wantsCA)
|
||||||
{
|
{
|
||||||
// Be careful: a breakpoint kills flags in between instructions
|
// Not actually merging instructions, but the effect is equivalent (we can't have breakpoints/etc in between).
|
||||||
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
|
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
|
||||||
{
|
{
|
||||||
if (cond == CC_C || cond == CC_NC)
|
if (cond == CC_C || cond == CC_NC)
|
||||||
{
|
{
|
||||||
|
@ -86,7 +102,7 @@ void Jit64::FinalizeCarry(bool ca)
|
||||||
js.carryFlagInverted = false;
|
js.carryFlagInverted = false;
|
||||||
if (js.op->wantsCA)
|
if (js.op->wantsCA)
|
||||||
{
|
{
|
||||||
if (!js.isLastInstruction && js.next_op->wantsCAInFlags && !js.next_inst_bp)
|
if (MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags)
|
||||||
{
|
{
|
||||||
if (ca)
|
if (ca)
|
||||||
STC();
|
STC();
|
||||||
|
@ -331,7 +347,10 @@ bool Jit64::CheckMergedBranch(int crf)
|
||||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const UGeckoInstruction& next = js.next_inst;
|
if (!MergeAllowedNextInstructions(1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
return (((next.OPCD == 16 /* bcx */) ||
|
return (((next.OPCD == 16 /* bcx */) ||
|
||||||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
|
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
|
||||||
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
|
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
|
||||||
|
@ -343,33 +362,35 @@ bool Jit64::CheckMergedBranch(int crf)
|
||||||
void Jit64::DoMergedBranch()
|
void Jit64::DoMergedBranch()
|
||||||
{
|
{
|
||||||
// Code that handles successful PPC branching.
|
// Code that handles successful PPC branching.
|
||||||
if (js.next_inst.OPCD == 16) // bcx
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
|
const u32 nextPC = js.op[1].address;
|
||||||
|
if (next.OPCD == 16) // bcx
|
||||||
{
|
{
|
||||||
if (js.next_inst.LK)
|
if (next.LK)
|
||||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||||
|
|
||||||
u32 destination;
|
u32 destination;
|
||||||
if (js.next_inst.AA)
|
if (next.AA)
|
||||||
destination = SignExt16(js.next_inst.BD << 2);
|
destination = SignExt16(next.BD << 2);
|
||||||
else
|
else
|
||||||
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
destination = nextPC + SignExt16(next.BD << 2);
|
||||||
WriteExit(destination, js.next_inst.LK, js.next_compilerPC + 4);
|
WriteExit(destination, next.LK, nextPC + 4);
|
||||||
}
|
}
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
|
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
|
||||||
{
|
{
|
||||||
if (js.next_inst.LK)
|
if (next.LK)
|
||||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||||
MOV(32, R(RSCRATCH), M(&CTR));
|
MOV(32, R(RSCRATCH), M(&CTR));
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||||
WriteExitDestInRSCRATCH(js.next_inst.LK, js.next_compilerPC + 4);
|
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
|
||||||
}
|
}
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), M(&LR));
|
MOV(32, R(RSCRATCH), M(&LR));
|
||||||
if (!m_enable_blr_optimization)
|
if (!m_enable_blr_optimization)
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||||
if (js.next_inst.LK)
|
if (next.LK)
|
||||||
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
MOV(32, M(&LR), Imm32(nextPC + 4));
|
||||||
WriteBLRExit();
|
WriteBLRExit();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -381,9 +402,11 @@ void Jit64::DoMergedBranch()
|
||||||
void Jit64::DoMergedBranchCondition()
|
void Jit64::DoMergedBranchCondition()
|
||||||
{
|
{
|
||||||
js.downcountAmount++;
|
js.downcountAmount++;
|
||||||
js.skipnext = true;
|
js.skipInstructions = 1;
|
||||||
int test_bit = 8 >> (js.next_inst.BI & 3);
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
|
int test_bit = 8 >> (next.BI & 3);
|
||||||
|
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
|
||||||
|
const u32 nextPC = js.op[1].address;
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -408,16 +431,18 @@ void Jit64::DoMergedBranchCondition()
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
WriteExit(js.next_compilerPC + 4);
|
WriteExit(nextPC + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::DoMergedBranchImmediate(s64 val)
|
void Jit64::DoMergedBranchImmediate(s64 val)
|
||||||
{
|
{
|
||||||
js.downcountAmount++;
|
js.downcountAmount++;
|
||||||
js.skipnext = true;
|
js.skipInstructions = 1;
|
||||||
int test_bit = 8 >> (js.next_inst.BI & 3);
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
|
int test_bit = 8 >> (next.BI & 3);
|
||||||
|
bool condition = !!(next.BO & BO_BRANCH_IF_TRUE);
|
||||||
|
const u32 nextPC = js.op[1].address;
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -441,7 +466,7 @@ void Jit64::DoMergedBranchImmediate(s64 val)
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
WriteExit(js.next_compilerPC + 4);
|
WriteExit(nextPC + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -95,16 +95,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
|
// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find it.
|
||||||
if (accessSize == 8 && js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 954 &&
|
if (MergeAllowedNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && js.op[1].inst.SUBOP10 == 954 &&
|
||||||
js.next_inst.RS == inst.RD && js.next_inst.RA == inst.RD && !js.next_inst.Rc)
|
js.op[1].inst.RS == inst.RD && js.op[1].inst.RA == inst.RD && !js.op[1].inst.Rc)
|
||||||
{
|
|
||||||
if (PowerPC::GetState() != PowerPC::CPU_STEPPING)
|
|
||||||
{
|
{
|
||||||
js.downcountAmount++;
|
js.downcountAmount++;
|
||||||
js.skipnext = true;
|
js.skipInstructions = 1;
|
||||||
signExtend = true;
|
signExtend = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||||
// Will give nice boost to dual core mode
|
// Will give nice boost to dual core mode
|
||||||
|
|
|
@ -282,16 +282,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
ADD(64, R(RAX), R(RDX));
|
ADD(64, R(RAX), R(RDX));
|
||||||
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
|
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
|
||||||
|
|
||||||
|
if (MergeAllowedNextInstructions(1))
|
||||||
|
{
|
||||||
|
const UGeckoInstruction& next = js.op[1].inst;
|
||||||
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
|
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
|
||||||
// if we can.
|
// if we can.
|
||||||
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
|
u32 nextIndex = (next.SPRU << 5) | (next.SPRL & 0x1F);
|
||||||
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
|
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
|
||||||
int n = js.next_inst.RD;
|
int n = next.RD;
|
||||||
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) &&
|
if (next.OPCD == 31 && next.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL) && n != d)
|
||||||
PowerPC::GetState() != PowerPC::CPU_STEPPING && n != d)
|
|
||||||
{
|
{
|
||||||
js.downcountAmount++;
|
js.downcountAmount++;
|
||||||
js.skipnext = true;
|
js.skipInstructions = 1;
|
||||||
gpr.Lock(d, n);
|
gpr.Lock(d, n);
|
||||||
gpr.BindToRegister(d, false);
|
gpr.BindToRegister(d, false);
|
||||||
gpr.BindToRegister(n, false);
|
gpr.BindToRegister(n, false);
|
||||||
|
@ -304,16 +306,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
MOV(32, gpr.R(d), R(RAX));
|
MOV(32, gpr.R(d), R(RAX));
|
||||||
if (nextIndex == SPR_TU)
|
if (nextIndex == SPR_TU)
|
||||||
MOV(32, gpr.R(n), R(RAX));
|
MOV(32, gpr.R(n), R(RAX));
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
gpr.Lock(d);
|
gpr.Lock(d);
|
||||||
gpr.BindToRegister(d, false);
|
gpr.BindToRegister(d, false);
|
||||||
if (iIndex == SPR_TU)
|
if (iIndex == SPR_TU)
|
||||||
SHR(64, R(RAX), Imm8(32));
|
SHR(64, R(RAX), Imm8(32));
|
||||||
MOV(32, gpr.R(d), R(RAX));
|
MOV(32, gpr.R(d), R(RAX));
|
||||||
}
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPR_XER:
|
case SPR_XER:
|
||||||
|
@ -341,6 +341,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
|
MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
gpr.UnlockAllX();
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -610,16 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
js.downcountAmount += opinfo->numCycles;
|
js.downcountAmount += opinfo->numCycles;
|
||||||
|
|
||||||
if (i == (code_block.m_num_instructions - 1))
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
js.next_inst = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// help peephole optimizations
|
|
||||||
js.next_inst = ops[i + 1].inst;
|
|
||||||
js.next_compilerPC = ops[i + 1].address;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 function = HLE::GetFunctionIndex(ops[i].address);
|
u32 function = HLE::GetFunctionIndex(ops[i].address);
|
||||||
if (function != 0)
|
if (function != 0)
|
||||||
|
|
|
@ -443,7 +443,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
||||||
|
|
||||||
js.skipnext = false;
|
js.skipInstructions = 0;
|
||||||
js.compilerPC = nextPC;
|
js.compilerPC = nextPC;
|
||||||
|
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
|
@ -459,13 +459,6 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
js.next_inst = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// help peephole optimizations
|
|
||||||
js.next_inst = ops[i + 1].inst;
|
|
||||||
js.next_compilerPC = ops[i + 1].address;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||||
|
|
|
@ -232,7 +232,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesThisBlock = 0;
|
js.fifoBytesThisBlock = 0;
|
||||||
js.downcountAmount = 0;
|
js.downcountAmount = 0;
|
||||||
js.skipnext = false;
|
js.skipInstructions = 0;
|
||||||
js.curBlock = b;
|
js.curBlock = b;
|
||||||
|
|
||||||
u32 nextPC = em_address;
|
u32 nextPC = em_address;
|
||||||
|
@ -281,13 +281,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
{
|
{
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
js.next_inst = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// help peephole optimizations
|
|
||||||
js.next_inst = ops[i + 1].inst;
|
|
||||||
js.next_compilerPC = ops[i + 1].address;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||||
|
|
|
@ -65,9 +65,7 @@ protected:
|
||||||
struct JitState
|
struct JitState
|
||||||
{
|
{
|
||||||
u32 compilerPC;
|
u32 compilerPC;
|
||||||
u32 next_compilerPC;
|
|
||||||
u32 blockStart;
|
u32 blockStart;
|
||||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
|
||||||
int instructionNumber;
|
int instructionNumber;
|
||||||
int instructionsLeft;
|
int instructionsLeft;
|
||||||
int downcountAmount;
|
int downcountAmount;
|
||||||
|
@ -88,10 +86,9 @@ protected:
|
||||||
bool firstFPInstructionFound;
|
bool firstFPInstructionFound;
|
||||||
bool isLastInstruction;
|
bool isLastInstruction;
|
||||||
bool memcheck;
|
bool memcheck;
|
||||||
bool skipnext;
|
int skipInstructions;
|
||||||
bool carryFlagSet;
|
bool carryFlagSet;
|
||||||
bool carryFlagInverted;
|
bool carryFlagInverted;
|
||||||
bool next_inst_bp;
|
|
||||||
|
|
||||||
int fifoBytesThisBlock;
|
int fifoBytesThisBlock;
|
||||||
|
|
||||||
|
@ -99,7 +96,6 @@ protected:
|
||||||
PPCAnalyst::BlockRegStats gpa;
|
PPCAnalyst::BlockRegStats gpa;
|
||||||
PPCAnalyst::BlockRegStats fpa;
|
PPCAnalyst::BlockRegStats fpa;
|
||||||
PPCAnalyst::CodeOp* op;
|
PPCAnalyst::CodeOp* op;
|
||||||
PPCAnalyst::CodeOp* next_op;
|
|
||||||
u8* rewriteStart;
|
u8* rewriteStart;
|
||||||
|
|
||||||
JitBlock *curBlock;
|
JitBlock *curBlock;
|
||||||
|
|
|
@ -219,6 +219,11 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
const GekkoOPInfo *b_info = b.opinfo;
|
const GekkoOPInfo *b_info = b.opinfo;
|
||||||
int a_flags = a_info->flags;
|
int a_flags = a_info->flags;
|
||||||
int b_flags = b_info->flags;
|
int b_flags = b_info->flags;
|
||||||
|
|
||||||
|
// can't reorder around breakpoints
|
||||||
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging &&
|
||||||
|
(PowerPC::breakpoints.IsAddressBreakPoint(a.address) || PowerPC::breakpoints.IsAddressBreakPoint(b.address)))
|
||||||
|
return false;
|
||||||
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
|
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
|
||||||
return false;
|
return false;
|
||||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
|
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
|
||||||
|
@ -462,6 +467,7 @@ void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
|
||||||
// Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
|
// Reorder cror instructions upwards (e.g. towards an fcmp). Technically we should be more
|
||||||
// picky about this, but cror seems to almost solely be used for this purpose in real code.
|
// picky about this, but cror seems to almost solely be used for this purpose in real code.
|
||||||
// Additionally, the other boolean ops seem to almost never be used.
|
// Additionally, the other boolean ops seem to almost never be used.
|
||||||
|
if (HasOption(OPTION_CROR_MERGE))
|
||||||
ReorderInstructionsCore(instructions, code, true, REORDER_CROR);
|
ReorderInstructionsCore(instructions, code, true, REORDER_CROR);
|
||||||
// For carry, bubble instructions *towards* each other; one direction often isn't enough
|
// For carry, bubble instructions *towards* each other; one direction often isn't enough
|
||||||
// to get pairs like addc/adde next to each other.
|
// to get pairs like addc/adde next to each other.
|
||||||
|
|
|
@ -214,6 +214,9 @@ public:
|
||||||
// Reorder carry instructions next to their associated branches and pass
|
// Reorder carry instructions next to their associated branches and pass
|
||||||
// carry flags in the x86 flags between them, instead of in XER.
|
// carry flags in the x86 flags between them, instead of in XER.
|
||||||
OPTION_CARRY_MERGE = (1 << 5),
|
OPTION_CARRY_MERGE = (1 << 5),
|
||||||
|
|
||||||
|
// Reorder cror instructions next to their associated fcmp.
|
||||||
|
OPTION_CROR_MERGE = (1 << 6),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue