From 3de4df29a3ed9ff66798f8bf3d994a31646d4f18 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Mon, 15 Dec 2008 22:57:11 +0000 Subject: [PATCH] merge cmp/branch when possible. smaller than expected (nearly unnoticable) speedup :P slightly bigger if you enable line 291 in ppcanalyst.cpp, but that's not fully tested yet. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1551 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp | 21 ++--- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 3 +- .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 91 ++++++++++++++----- Source/Core/DolphinWX/Src/Main.cpp | 5 + 4 files changed, 86 insertions(+), 34 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index caf9b9f6d8..096ec19840 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -185,7 +185,6 @@ namespace Jit64 jo.assumeFPLoadFromMem = true; jo.fpAccurateFlags = true; jo.optimizeGatherPipe = true; - jo.interpretFPU = false; jo.fastInterrupts = false; } @@ -316,6 +315,7 @@ namespace Jit64 js.curBlock = &b; js.blockSetsQuantizers = false; js.block_flags = 0; + js.cancel = false; //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. @@ -384,6 +384,7 @@ namespace Jit64 js.op = &ops[i]; js.instructionNumber = i; if (i == (int)size - 1) { + // WARNING - cmp->branch merging will screw this up. js.isLastInstruction = true; js.next_inst = 0; if (Profiler::g_ProfileBlocks) { @@ -398,27 +399,25 @@ namespace Jit64 } else { // help peephole optimizations js.next_inst = ops[i + 1].inst; + js.next_compilerPC = ops[i + 1].address; } - - // const GekkoOpInfo *info = GetOpInfo(); - if (jo.interpretFPU && PPCTables::UsesFPU(ops[i].inst)) - Default(ops[i].inst); - else - PPCTables::CompileInstruction(ops[i].inst); - gpr.SanityCheck(); - fpr.SanityCheck(); if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0)); } + + PPCTables::CompileInstruction(ops[i].inst); + + gpr.SanityCheck(); + fpr.SanityCheck(); + if (js.cancel) break; } - js.compilerPC += 4; b.flags = js.block_flags; b.codeSize = (u32)(GetCodePtr() - start); - b.originalSize = js.compilerPC - emaddress; + b.originalSize = size; return normalEntry; } } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 78363d7493..868df7f983 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -48,7 +48,9 @@ namespace Jit64 struct JitState { u32 compilerPC; + u32 next_compilerPC; u32 blockStart; + bool cancel; UGeckoInstruction next_inst; // for easy peephole opt. int blockSize; int instructionNumber; @@ -76,7 +78,6 @@ namespace Jit64 bool fpAccurateFlags; bool enableFastMem; bool optimizeGatherPipe; - bool interpretFPU; bool fastInterrupts; }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 14729d8056..e0626b8621 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -154,14 +154,24 @@ namespace Jit64 if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) {Default(inst); return;} // turn off from debugger #endif - // Should check if the next intruction is a branch - if it is, merge the two. This can save - // a whole bunch of instructions and cycles, especially if we aggressively bubble down compares - // towards branches. INSTRUCTION_START; int a = inst.RA; int b = inst.RB; int crf = inst.CRFD; int shift = crf * 4; + + bool merge_branch = false; + int test_crf = js.next_inst.BI >> 2; + // Check if the next intruction is a branch - if it is, merge the two. + if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && + !(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) { + // Looks like a decent conditional branch that we can merge with. + // It only test CR, not CTR. + if (test_crf == crf) { + merge_branch = true; + } + } + Gen::CCFlags less_than, greater_than; OpArg comparand; if (inst.OPCD == 31) { @@ -193,27 +203,64 @@ namespace Jit64 } } - CMP(32, gpr.R(a), comparand); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); + if (!merge_branch) { + // Keep the normal code separate for clarity. + CMP(32, gpr.R(a), comparand); + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 + FixupBranch continue1 = J(); + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 + FixupBranch continue2 = J(); + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 + SetJumpTarget(continue1); + SetJumpTarget(continue2); + } else { + int test_bit = 8 >> (js.next_inst.BI & 3); + bool condition = (js.next_inst.BO & 8) ? false : true; + + u32 destination; + if (js.next_inst.AA) + destination = SignExt16(js.next_inst.BD << 2); + else + destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); + + CMP(32, gpr.R(a), comparand); + gpr.UnlockAll(); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 + FixupBranch continue1 = J(); + + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 + FixupBranch continue2 = J(); + + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 + FixupBranch continue3; + if (!!(8 & test_bit) == condition) continue3 = J(); + + //if (!!(8 & test_bit) != condition) SetJumpTarget(continue3); + if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + + WriteExit(destination, 0); + + if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); + if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + + WriteExit(js.next_compilerPC + 4, 1); + + js.cancel = true; + } gpr.UnlockAll(); - - // TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches. - // Since it's the last block, some liberties can be taken. - // don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags. } void orx(UGeckoInstruction inst) diff --git a/Source/Core/DolphinWX/Src/Main.cpp b/Source/Core/DolphinWX/Src/Main.cpp index a04cfb8c6c..093d4e9cba 100644 --- a/Source/Core/DolphinWX/Src/Main.cpp +++ b/Source/Core/DolphinWX/Src/Main.cpp @@ -266,9 +266,14 @@ void DolphinApp::OnEndSession() bool wxMsgAlert(const char* caption, const char* text, bool yes_no) { +#ifdef _WIN32 + // I like parentless messageboxes - don't block the debug window. + return IDYES == MessageBox(0, text, caption, yes_no?MB_YESNO:MB_OK); +#else return wxYES == wxMessageBox(wxString::FromAscii(text), wxString::FromAscii(caption), (yes_no)?wxYES_NO:wxOK); +#endif }