Jit64/JitIL: Enabled block merging to improved the performance. This improves the fps 4-5 faster in some games. However it decreases the fps 10 slower in other games, MP2 and etc. In this commit, the actual block merging is disabled. If you want to try block merging, please set FUNCTION_FOLLOWING_THRESHOLD to a positive integer.

Increased the size of code buffer to prevent cache clearing with block merging. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6193 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-09-09 02:14:03 +00:00 · 2010-09-09 02:14:03 +00:00 · ccb96be9b3
parent bf4a18e08c
commit ccb96be9b3
5 changed files with 48 additions and 29 deletions
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp
@ -158,7 +158,7 @@ ps_adds1

 */

-static int CODE_SIZE = 1024*1024*16;
+static int CODE_SIZE = 1024*1024*32;

 namespace CPUCompare
 {
--- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Branch.cpp
@ -81,36 +81,39 @@ void Jit64::bx(UGeckoInstruction inst)
 	INSTRUCTION_START
 	JITDISABLE(Branch)

+	// We must always process the following sentence
+	// even if the blocks are merged by PPCAnalyst::Flatten().
 	if (inst.LK)
 		MOV(32, M(&LR), Imm32(js.compilerPC + 4));
+
+	// If this is not the last instruction of a block,
+	// we will skip the rest process.
+	// Because PPCAnalyst::Flatten() merged the blocks.
+	if (!js.isLastInstruction) {
+		return;
+	}
+
 	gpr.Flush(FLUSH_ALL);
 	fpr.Flush(FLUSH_ALL);

-	if (js.isLastInstruction)
-	{
-		u32 destination;
-		if (inst.AA)
-			destination = SignExt26(inst.LI << 2);
-		else
-			destination = js.compilerPC + SignExt26(inst.LI << 2);
+	u32 destination;
+	if (inst.AA)
+		destination = SignExt26(inst.LI << 2);
+	else
+		destination = js.compilerPC + SignExt26(inst.LI << 2);
 #ifdef ACID_TEST
-		if (inst.LK)
-			AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000)));
+	if (inst.LK)
+		AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000)));
 #endif
-		if (destination == js.compilerPC)
-		{
-			//PanicAlert("Idle loop detected at %08x", destination);
+	if (destination == js.compilerPC)
+	{
+		//PanicAlert("Idle loop detected at %08x", destination);
 		//	CALL(ProtectFunction(&CoreTiming::Idle, 0));
 		//	JMP(Asm::testExceptions, true);
-			// make idle loops go faster
-			js.downcountAmount += 8;
-		}
-		WriteExit(destination, 0);
-	}
-	else {
-		// TODO: investigate the good old method of merging blocks here.
-		PanicAlert("bx not last instruction of block"); // this should not happen
+		// make idle loops go faster
+		js.downcountAmount += 8;
 	}
+	WriteExit(destination, 0);
 }

 // TODO - optimize to hell and beyond
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL.cpp
@ -152,7 +152,7 @@ ps_adds1

 */

-static int CODE_SIZE = 1024*1024*16;
+static int CODE_SIZE = 1024*1024*32;

 namespace CPUCompare
 {
--- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp
+++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Branch.cpp
@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst)
 	NORMALBRANCH_START
 	INSTRUCTION_START;

+	// We must always process the following sentence
+	// even if the blocks are merged by PPCAnalyst::Flatten().
 	if (inst.LK)
 		ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));

+	// If this is not the last instruction of a block,
+	// we will skip the rest process.
+	// Because PPCAnalyst::Flatten() merged the blocks.
+	if (!js.isLastInstruction) {
+		return;
+	}
+
 	u32 destination;
 	if (inst.AA)
 		destination = SignExt26(inst.LI << 2);
--- a/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/Src/PowerPC/PPCAnalyst.cpp
@ -40,10 +40,9 @@ namespace PPCAnalyst {

 using namespace std;

-enum
-{
-	CODEBUFFER_SIZE = 32000,
-};
+static const int CODEBUFFER_SIZE = 32000;
+// 0 does not perform block merging
+static const int FUNCTION_FOLLOWING_THRESHOLD = 0;

 CodeBuffer::CodeBuffer(int size)
 {
@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
 			}
 			if (follow)
 				numFollows++;
-			if (numFollows > 1)
+			// TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
+			//       If it is small, the performance will be down.
+			//       If it is big, the size of generated code will be big and
+			//       cache clearning will happen many times.
+			// TODO: Investivate the reason why
+			//       "0" is fastest in some games, MP2 for example.
+			if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
 				follow = false;
-			follow = false;
+
 			if (!follow)
 			{
 				if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
 			}
 			else
 			{
-				code[i].skip = true;
+				// We don't "code[i].skip = true" here
+				// because bx may store a certain value to the link register.
+				// Instead, we skip a part of bx in Jit**::bx().
 				address = destination;
 			}
 		}