merge cmp/branch when possible. smaller than expected (nearly unnoticable) speedup :P slightly bigger if you enable line 291 in ppcanalyst.cpp, but that's not fully tested yet.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1551 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
060523a805
commit
3de4df29a3
|
@ -185,7 +185,6 @@ namespace Jit64
|
||||||
jo.assumeFPLoadFromMem = true;
|
jo.assumeFPLoadFromMem = true;
|
||||||
jo.fpAccurateFlags = true;
|
jo.fpAccurateFlags = true;
|
||||||
jo.optimizeGatherPipe = true;
|
jo.optimizeGatherPipe = true;
|
||||||
jo.interpretFPU = false;
|
|
||||||
jo.fastInterrupts = false;
|
jo.fastInterrupts = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,6 +315,7 @@ namespace Jit64
|
||||||
js.curBlock = &b;
|
js.curBlock = &b;
|
||||||
js.blockSetsQuantizers = false;
|
js.blockSetsQuantizers = false;
|
||||||
js.block_flags = 0;
|
js.block_flags = 0;
|
||||||
|
js.cancel = false;
|
||||||
|
|
||||||
//Analyze the block, collect all instructions it is made of (including inlining,
|
//Analyze the block, collect all instructions it is made of (including inlining,
|
||||||
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||||
|
@ -384,6 +384,7 @@ namespace Jit64
|
||||||
js.op = &ops[i];
|
js.op = &ops[i];
|
||||||
js.instructionNumber = i;
|
js.instructionNumber = i;
|
||||||
if (i == (int)size - 1) {
|
if (i == (int)size - 1) {
|
||||||
|
// WARNING - cmp->branch merging will screw this up.
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
js.next_inst = 0;
|
js.next_inst = 0;
|
||||||
if (Profiler::g_ProfileBlocks) {
|
if (Profiler::g_ProfileBlocks) {
|
||||||
|
@ -398,27 +399,25 @@ namespace Jit64
|
||||||
} else {
|
} else {
|
||||||
// help peephole optimizations
|
// help peephole optimizations
|
||||||
js.next_inst = ops[i + 1].inst;
|
js.next_inst = ops[i + 1].inst;
|
||||||
|
js.next_compilerPC = ops[i + 1].address;
|
||||||
}
|
}
|
||||||
|
|
||||||
// const GekkoOpInfo *info = GetOpInfo();
|
|
||||||
if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst))
|
|
||||||
Default(ops[i].inst);
|
|
||||||
else
|
|
||||||
PPCTables::CompileInstruction(ops[i].inst);
|
|
||||||
|
|
||||||
gpr.SanityCheck();
|
|
||||||
fpr.SanityCheck();
|
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||||
{
|
{
|
||||||
js.fifoBytesThisBlock -= 32;
|
js.fifoBytesThisBlock -= 32;
|
||||||
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PPCTables::CompileInstruction(ops[i].inst);
|
||||||
|
|
||||||
|
gpr.SanityCheck();
|
||||||
|
fpr.SanityCheck();
|
||||||
|
if (js.cancel) break;
|
||||||
}
|
}
|
||||||
js.compilerPC += 4;
|
|
||||||
|
|
||||||
b.flags = js.block_flags;
|
b.flags = js.block_flags;
|
||||||
b.codeSize = (u32)(GetCodePtr() - start);
|
b.codeSize = (u32)(GetCodePtr() - start);
|
||||||
b.originalSize = js.compilerPC - emaddress;
|
b.originalSize = size;
|
||||||
return normalEntry;
|
return normalEntry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,7 +48,9 @@ namespace Jit64
|
||||||
struct JitState
|
struct JitState
|
||||||
{
|
{
|
||||||
u32 compilerPC;
|
u32 compilerPC;
|
||||||
|
u32 next_compilerPC;
|
||||||
u32 blockStart;
|
u32 blockStart;
|
||||||
|
bool cancel;
|
||||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||||
int blockSize;
|
int blockSize;
|
||||||
int instructionNumber;
|
int instructionNumber;
|
||||||
|
@ -76,7 +78,6 @@ namespace Jit64
|
||||||
bool fpAccurateFlags;
|
bool fpAccurateFlags;
|
||||||
bool enableFastMem;
|
bool enableFastMem;
|
||||||
bool optimizeGatherPipe;
|
bool optimizeGatherPipe;
|
||||||
bool interpretFPU;
|
|
||||||
bool fastInterrupts;
|
bool fastInterrupts;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -154,14 +154,24 @@ namespace Jit64
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
#endif
|
#endif
|
||||||
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
|
||||||
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
|
||||||
// towards branches.
|
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
int b = inst.RB;
|
int b = inst.RB;
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
int shift = crf * 4;
|
int shift = crf * 4;
|
||||||
|
|
||||||
|
bool merge_branch = false;
|
||||||
|
int test_crf = js.next_inst.BI >> 2;
|
||||||
|
// Check if the next intruction is a branch - if it is, merge the two.
|
||||||
|
if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) &&
|
||||||
|
!(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) {
|
||||||
|
// Looks like a decent conditional branch that we can merge with.
|
||||||
|
// It only test CR, not CTR.
|
||||||
|
if (test_crf == crf) {
|
||||||
|
merge_branch = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Gen::CCFlags less_than, greater_than;
|
Gen::CCFlags less_than, greater_than;
|
||||||
OpArg comparand;
|
OpArg comparand;
|
||||||
if (inst.OPCD == 31) {
|
if (inst.OPCD == 31) {
|
||||||
|
@ -193,27 +203,64 @@ namespace Jit64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CMP(32, gpr.R(a), comparand);
|
if (!merge_branch) {
|
||||||
FixupBranch pLesser = J_CC(less_than);
|
// Keep the normal code separate for clarity.
|
||||||
FixupBranch pGreater = J_CC(greater_than);
|
CMP(32, gpr.R(a), comparand);
|
||||||
|
FixupBranch pLesser = J_CC(less_than);
|
||||||
|
FixupBranch pGreater = J_CC(greater_than);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||||
|
FixupBranch continue1 = J();
|
||||||
|
SetJumpTarget(pGreater);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||||
|
FixupBranch continue2 = J();
|
||||||
|
SetJumpTarget(pLesser);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||||
|
SetJumpTarget(continue1);
|
||||||
|
SetJumpTarget(continue2);
|
||||||
|
} else {
|
||||||
|
int test_bit = 8 >> (js.next_inst.BI & 3);
|
||||||
|
bool condition = (js.next_inst.BO & 8) ? false : true;
|
||||||
|
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
u32 destination;
|
||||||
FixupBranch continue1 = J();
|
if (js.next_inst.AA)
|
||||||
|
destination = SignExt16(js.next_inst.BD << 2);
|
||||||
|
else
|
||||||
|
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
||||||
|
|
||||||
SetJumpTarget(pGreater);
|
CMP(32, gpr.R(a), comparand);
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
gpr.UnlockAll();
|
||||||
FixupBranch continue2 = J();
|
gpr.Flush(FLUSH_ALL);
|
||||||
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
FixupBranch pLesser = J_CC(less_than);
|
||||||
|
FixupBranch pGreater = J_CC(greater_than);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||||
|
FixupBranch continue1 = J();
|
||||||
|
|
||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pGreater);
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||||
SetJumpTarget(continue1);
|
FixupBranch continue2 = J();
|
||||||
SetJumpTarget(continue2);
|
|
||||||
|
SetJumpTarget(pLesser);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||||
|
FixupBranch continue3;
|
||||||
|
if (!!(8 & test_bit) == condition) continue3 = J();
|
||||||
|
|
||||||
|
//if (!!(8 & test_bit) != condition) SetJumpTarget(continue3);
|
||||||
|
if (!!(4 & test_bit) != condition) SetJumpTarget(continue2);
|
||||||
|
if (!!(2 & test_bit) != condition) SetJumpTarget(continue1);
|
||||||
|
|
||||||
|
WriteExit(destination, 0);
|
||||||
|
|
||||||
|
if (!!(8 & test_bit) == condition) SetJumpTarget(continue3);
|
||||||
|
if (!!(4 & test_bit) == condition) SetJumpTarget(continue2);
|
||||||
|
if (!!(2 & test_bit) == condition) SetJumpTarget(continue1);
|
||||||
|
|
||||||
|
WriteExit(js.next_compilerPC + 4, 1);
|
||||||
|
|
||||||
|
js.cancel = true;
|
||||||
|
}
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
|
||||||
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
|
|
||||||
// Since it's the last block, some liberties can be taken.
|
|
||||||
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void orx(UGeckoInstruction inst)
|
void orx(UGeckoInstruction inst)
|
||||||
|
|
|
@ -266,9 +266,14 @@ void DolphinApp::OnEndSession()
|
||||||
|
|
||||||
bool wxMsgAlert(const char* caption, const char* text,
|
bool wxMsgAlert(const char* caption, const char* text,
|
||||||
bool yes_no) {
|
bool yes_no) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
// I like parentless messageboxes - don't block the debug window.
|
||||||
|
return IDYES == MessageBox(0, text, caption, yes_no?MB_YESNO:MB_OK);
|
||||||
|
#else
|
||||||
return wxYES == wxMessageBox(wxString::FromAscii(text),
|
return wxYES == wxMessageBox(wxString::FromAscii(text),
|
||||||
wxString::FromAscii(caption),
|
wxString::FromAscii(caption),
|
||||||
(yes_no)?wxYES_NO:wxOK);
|
(yes_no)?wxYES_NO:wxOK);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue