merge cmp/branch when possible. smaller than expected (nearly unnoticable) speedup :P slightly bigger if you enable line 291 in ppcanalyst.cpp, but that's not fully tested yet.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1551 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-12-15 22:57:11 +00:00
parent 060523a805
commit 3de4df29a3
4 changed files with 86 additions and 34 deletions

View File

@ -185,7 +185,6 @@ namespace Jit64
jo.assumeFPLoadFromMem = true;
jo.fpAccurateFlags = true;
jo.optimizeGatherPipe = true;
jo.interpretFPU = false;
jo.fastInterrupts = false;
}
@ -316,6 +315,7 @@ namespace Jit64
js.curBlock = &b;
js.blockSetsQuantizers = false;
js.block_flags = 0;
js.cancel = false;
//Analyze the block, collect all instructions it is made of (including inlining,
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
@ -384,6 +384,7 @@ namespace Jit64
js.op = &ops[i];
js.instructionNumber = i;
if (i == (int)size - 1) {
// WARNING - cmp->branch merging will screw this up.
js.isLastInstruction = true;
js.next_inst = 0;
if (Profiler::g_ProfileBlocks) {
@ -398,27 +399,25 @@ namespace Jit64
} else {
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address;
}
// const GekkoOpInfo *info = GetOpInfo();
if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst))
Default(ops[i].inst);
else
PPCTables::CompileInstruction(ops[i].inst);
gpr.SanityCheck();
fpr.SanityCheck();
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
}
PPCTables::CompileInstruction(ops[i].inst);
gpr.SanityCheck();
fpr.SanityCheck();
if (js.cancel) break;
}
js.compilerPC += 4;
b.flags = js.block_flags;
b.codeSize = (u32)(GetCodePtr() - start);
b.originalSize = js.compilerPC - emaddress;
b.originalSize = size;
return normalEntry;
}
}

View File

@ -48,7 +48,9 @@ namespace Jit64
struct JitState
{
u32 compilerPC;
u32 next_compilerPC;
u32 blockStart;
bool cancel;
UGeckoInstruction next_inst; // for easy peephole opt.
int blockSize;
int instructionNumber;
@ -76,7 +78,6 @@ namespace Jit64
bool fpAccurateFlags;
bool enableFastMem;
bool optimizeGatherPipe;
bool interpretFPU;
bool fastInterrupts;
};

View File

@ -154,14 +154,24 @@ namespace Jit64
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
// Should check if the next intruction is a branch - if it is, merge the two. This can save
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
// towards branches.
INSTRUCTION_START;
int a = inst.RA;
int b = inst.RB;
int crf = inst.CRFD;
int shift = crf * 4;
bool merge_branch = false;
int test_crf = js.next_inst.BI >> 2;
// Check if the next intruction is a branch - if it is, merge the two.
if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) &&
!(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) {
// Looks like a decent conditional branch that we can merge with.
// It only test CR, not CTR.
if (test_crf == crf) {
merge_branch = true;
}
}
Gen::CCFlags less_than, greater_than;
OpArg comparand;
if (inst.OPCD == 31) {
@ -193,10 +203,36 @@ namespace Jit64
}
}
if (!merge_branch) {
// Keep the normal code separate for clarity.
CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
FixupBranch continue1 = J();
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
FixupBranch continue2 = J();
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
} else {
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = (js.next_inst.BO & 8) ? false : true;
u32 destination;
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
CMP(32, gpr.R(a), comparand);
gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
FixupBranch continue1 = J();
@ -206,14 +242,25 @@ namespace Jit64
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
SetJumpTarget(continue1);
SetJumpTarget(continue2);
FixupBranch continue3;
if (!!(8 & test_bit) == condition) continue3 = J();
//if (!!(8 & test_bit) != condition) SetJumpTarget(continue3);
if (!!(4 & test_bit) != condition) SetJumpTarget(continue2);
if (!!(2 & test_bit) != condition) SetJumpTarget(continue1);
WriteExit(destination, 0);
if (!!(8 & test_bit) == condition) SetJumpTarget(continue3);
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
WriteExit(js.next_compilerPC + 4, 1);
js.cancel = true;
}
gpr.UnlockAll();
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
// Since it's the last block, some liberties can be taken.
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
}
void orx(UGeckoInstruction inst)

View File

@ -266,9 +266,14 @@ void DolphinApp::OnEndSession()
bool wxMsgAlert(const char* caption, const char* text,
bool yes_no) {
#ifdef _WIN32
// I like parentless messageboxes - don't block the debug window.
return IDYES == MessageBox(0, text, caption, yes_no?MB_YESNO:MB_OK);
#else
return wxYES == wxMessageBox(wxString::FromAscii(text),
wxString::FromAscii(caption),
(yes_no)?wxYES_NO:wxOK);
#endif
}