merge cmp/branch when possible. smaller than expected (nearly unnoticable) speedup :P slightly bigger if you enable line 291 in ppcanalyst.cpp, but that's not fully tested yet.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1551 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
060523a805
commit
3de4df29a3
|
@ -185,7 +185,6 @@ namespace Jit64
|
|||
jo.assumeFPLoadFromMem = true;
|
||||
jo.fpAccurateFlags = true;
|
||||
jo.optimizeGatherPipe = true;
|
||||
jo.interpretFPU = false;
|
||||
jo.fastInterrupts = false;
|
||||
}
|
||||
|
||||
|
@ -316,6 +315,7 @@ namespace Jit64
|
|||
js.curBlock = &b;
|
||||
js.blockSetsQuantizers = false;
|
||||
js.block_flags = 0;
|
||||
js.cancel = false;
|
||||
|
||||
//Analyze the block, collect all instructions it is made of (including inlining,
|
||||
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||
|
@ -384,6 +384,7 @@ namespace Jit64
|
|||
js.op = &ops[i];
|
||||
js.instructionNumber = i;
|
||||
if (i == (int)size - 1) {
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
if (Profiler::g_ProfileBlocks) {
|
||||
|
@ -398,27 +399,25 @@ namespace Jit64
|
|||
} else {
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
}
|
||||
|
||||
// const GekkoOpInfo *info = GetOpInfo();
|
||||
if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst))
|
||||
Default(ops[i].inst);
|
||||
else
|
||||
PPCTables::CompileInstruction(ops[i].inst);
|
||||
|
||||
gpr.SanityCheck();
|
||||
fpr.SanityCheck();
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
{
|
||||
js.fifoBytesThisBlock -= 32;
|
||||
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
||||
}
|
||||
|
||||
PPCTables::CompileInstruction(ops[i].inst);
|
||||
|
||||
gpr.SanityCheck();
|
||||
fpr.SanityCheck();
|
||||
if (js.cancel) break;
|
||||
}
|
||||
js.compilerPC += 4;
|
||||
|
||||
b.flags = js.block_flags;
|
||||
b.codeSize = (u32)(GetCodePtr() - start);
|
||||
b.originalSize = js.compilerPC - emaddress;
|
||||
b.originalSize = size;
|
||||
return normalEntry;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,9 @@ namespace Jit64
|
|||
struct JitState
|
||||
{
|
||||
u32 compilerPC;
|
||||
u32 next_compilerPC;
|
||||
u32 blockStart;
|
||||
bool cancel;
|
||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||
int blockSize;
|
||||
int instructionNumber;
|
||||
|
@ -76,7 +78,6 @@ namespace Jit64
|
|||
bool fpAccurateFlags;
|
||||
bool enableFastMem;
|
||||
bool optimizeGatherPipe;
|
||||
bool interpretFPU;
|
||||
bool fastInterrupts;
|
||||
};
|
||||
|
||||
|
|
|
@ -154,14 +154,24 @@ namespace Jit64
|
|||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
#endif
|
||||
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
||||
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
||||
// towards branches.
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
|
||||
bool merge_branch = false;
|
||||
int test_crf = js.next_inst.BI >> 2;
|
||||
// Check if the next intruction is a branch - if it is, merge the two.
|
||||
if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) &&
|
||||
!(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) {
|
||||
// Looks like a decent conditional branch that we can merge with.
|
||||
// It only test CR, not CTR.
|
||||
if (test_crf == crf) {
|
||||
merge_branch = true;
|
||||
}
|
||||
}
|
||||
|
||||
Gen::CCFlags less_than, greater_than;
|
||||
OpArg comparand;
|
||||
if (inst.OPCD == 31) {
|
||||
|
@ -193,27 +203,64 @@ namespace Jit64
|
|||
}
|
||||
}
|
||||
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||
FixupBranch continue2 = J();
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
if (!merge_branch) {
|
||||
// Keep the normal code separate for clarity.
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||
FixupBranch continue2 = J();
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
} else {
|
||||
int test_bit = 8 >> (js.next_inst.BI & 3);
|
||||
bool condition = (js.next_inst.BO & 8) ? false : true;
|
||||
|
||||
u32 destination;
|
||||
if (js.next_inst.AA)
|
||||
destination = SignExt16(js.next_inst.BD << 2);
|
||||
else
|
||||
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
||||
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
gpr.UnlockAll();
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||
FixupBranch continue2 = J();
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||
FixupBranch continue3;
|
||||
if (!!(8 & test_bit) == condition) continue3 = J();
|
||||
|
||||
//if (!!(8 & test_bit) != condition) SetJumpTarget(continue3);
|
||||
if (!!(4 & test_bit) != condition) SetJumpTarget(continue2);
|
||||
if (!!(2 & test_bit) != condition) SetJumpTarget(continue1);
|
||||
|
||||
WriteExit(destination, 0);
|
||||
|
||||
if (!!(8 & test_bit) == condition) SetJumpTarget(continue3);
|
||||
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
|
||||
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
|
||||
|
||||
WriteExit(js.next_compilerPC + 4, 1);
|
||||
|
||||
js.cancel = true;
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
|
||||
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
|
||||
// Since it's the last block, some liberties can be taken.
|
||||
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
|
||||
}
|
||||
|
||||
void orx(UGeckoInstruction inst)
|
||||
|
|
|
@ -266,9 +266,14 @@ void DolphinApp::OnEndSession()
|
|||
|
||||
bool wxMsgAlert(const char* caption, const char* text,
|
||||
bool yes_no) {
|
||||
#ifdef _WIN32
|
||||
// I like parentless messageboxes - don't block the debug window.
|
||||
return IDYES == MessageBox(0, text, caption, yes_no?MB_YESNO:MB_OK);
|
||||
#else
|
||||
return wxYES == wxMessageBox(wxString::FromAscii(text),
|
||||
wxString::FromAscii(caption),
|
||||
(yes_no)?wxYES_NO:wxOK);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue