Jit64/JitIL: Enabled block merging to improved the performance. This improves the fps 4-5 faster in some games. However it decreases the fps 10 slower in other games, MP2 and etc. In this commit, the actual block merging is disabled. If you want to try block merging, please set FUNCTION_FOLLOWING_THRESHOLD to a positive integer.
Increased the size of code buffer to prevent cache clearing with block merging. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6193 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
bf4a18e08c
commit
ccb96be9b3
|
@ -158,7 +158,7 @@ ps_adds1
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int CODE_SIZE = 1024*1024*16;
|
static int CODE_SIZE = 1024*1024*32;
|
||||||
|
|
||||||
namespace CPUCompare
|
namespace CPUCompare
|
||||||
{
|
{
|
||||||
|
|
|
@ -81,13 +81,21 @@ void Jit64::bx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(Branch)
|
JITDISABLE(Branch)
|
||||||
|
|
||||||
|
// We must always process the following sentence
|
||||||
|
// even if the blocks are merged by PPCAnalyst::Flatten().
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
||||||
|
|
||||||
|
// If this is not the last instruction of a block,
|
||||||
|
// we will skip the rest process.
|
||||||
|
// Because PPCAnalyst::Flatten() merged the blocks.
|
||||||
|
if (!js.isLastInstruction) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
gpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
|
||||||
if (js.isLastInstruction)
|
|
||||||
{
|
|
||||||
u32 destination;
|
u32 destination;
|
||||||
if (inst.AA)
|
if (inst.AA)
|
||||||
destination = SignExt26(inst.LI << 2);
|
destination = SignExt26(inst.LI << 2);
|
||||||
|
@ -107,11 +115,6 @@ void Jit64::bx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
WriteExit(destination, 0);
|
WriteExit(destination, 0);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
// TODO: investigate the good old method of merging blocks here.
|
|
||||||
PanicAlert("bx not last instruction of block"); // this should not happen
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO - optimize to hell and beyond
|
// TODO - optimize to hell and beyond
|
||||||
// TODO - make nice easy to optimize special cases for the most common
|
// TODO - make nice easy to optimize special cases for the most common
|
||||||
|
|
|
@ -152,7 +152,7 @@ ps_adds1
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int CODE_SIZE = 1024*1024*16;
|
static int CODE_SIZE = 1024*1024*32;
|
||||||
|
|
||||||
namespace CPUCompare
|
namespace CPUCompare
|
||||||
{
|
{
|
||||||
|
|
|
@ -59,9 +59,18 @@ void JitIL::bx(UGeckoInstruction inst)
|
||||||
NORMALBRANCH_START
|
NORMALBRANCH_START
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
|
|
||||||
|
// We must always process the following sentence
|
||||||
|
// even if the blocks are merged by PPCAnalyst::Flatten().
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||||
|
|
||||||
|
// If this is not the last instruction of a block,
|
||||||
|
// we will skip the rest process.
|
||||||
|
// Because PPCAnalyst::Flatten() merged the blocks.
|
||||||
|
if (!js.isLastInstruction) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
u32 destination;
|
u32 destination;
|
||||||
if (inst.AA)
|
if (inst.AA)
|
||||||
destination = SignExt26(inst.LI << 2);
|
destination = SignExt26(inst.LI << 2);
|
||||||
|
|
|
@ -40,10 +40,9 @@ namespace PPCAnalyst {
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
enum
|
static const int CODEBUFFER_SIZE = 32000;
|
||||||
{
|
// 0 does not perform block merging
|
||||||
CODEBUFFER_SIZE = 32000,
|
static const int FUNCTION_FOLLOWING_THRESHOLD = 0;
|
||||||
};
|
|
||||||
|
|
||||||
CodeBuffer::CodeBuffer(int size)
|
CodeBuffer::CodeBuffer(int size)
|
||||||
{
|
{
|
||||||
|
@ -446,9 +445,15 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
|
||||||
}
|
}
|
||||||
if (follow)
|
if (follow)
|
||||||
numFollows++;
|
numFollows++;
|
||||||
if (numFollows > 1)
|
// TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD.
|
||||||
follow = false;
|
// If it is small, the performance will be down.
|
||||||
|
// If it is big, the size of generated code will be big and
|
||||||
|
// cache clearning will happen many times.
|
||||||
|
// TODO: Investivate the reason why
|
||||||
|
// "0" is fastest in some games, MP2 for example.
|
||||||
|
if (numFollows > FUNCTION_FOLLOWING_THRESHOLD)
|
||||||
follow = false;
|
follow = false;
|
||||||
|
|
||||||
if (!follow)
|
if (!follow)
|
||||||
{
|
{
|
||||||
if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
|
if (opinfo->flags & FL_ENDBLOCK) //right now we stop early
|
||||||
|
@ -460,7 +465,9 @@ u32 Flatten(u32 address, int *realsize, BlockStats *st, BlockRegStats *gpa, Bloc
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code[i].skip = true;
|
// We don't "code[i].skip = true" here
|
||||||
|
// because bx may store a certain value to the link register.
|
||||||
|
// Instead, we skip a part of bx in Jit**::bx().
|
||||||
address = destination;
|
address = destination;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue