MMU Speed Optimisations:

* Un-cleaned the Load/Store code.  I think this will fix the stability issues from r6032
* Added memory exception checking to a couple more JIT FPU instructions
* Optimised the 64bit fast memory access slightly
* Optimised the MMU speed hack.

The rest of the speed optimisations from r6032 have been retained.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6034 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-02 10:28:37 +00:00
parent 7b83640a28
commit fcf2fb2b9b
3 changed files with 53 additions and 28 deletions

View File

@ -44,9 +44,11 @@ void Jit64::lbzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
@ -54,11 +56,13 @@ void Jit64::lbzx(UGeckoInstruction inst)
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -69,9 +73,11 @@ void Jit64::lhax(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
@ -80,11 +86,13 @@ void Jit64::lhax(UGeckoInstruction inst)
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -95,9 +103,11 @@ void Jit64::lwzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
{
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a));
}
@ -105,11 +115,13 @@ void Jit64::lwzx(UGeckoInstruction inst)
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -207,21 +219,25 @@ void Jit64::lXz(UGeckoInstruction inst)
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
}
else
{
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
gpr.LoadToX64(a, true, false);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, a == d, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
}
@ -236,16 +252,19 @@ void Jit64::lha(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, d == a, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -268,12 +287,14 @@ void Jit64::lwzux(UGeckoInstruction inst)
MEMCHECK_START
gpr.KillImmediate(d);
gpr.Lock(d);
gpr.LoadToX64(d, b == d, true);
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX();
}
// Zero cache line.
@ -392,7 +413,9 @@ void Jit64::stX(UGeckoInstruction inst)
//Still here? Do regular path.
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
gpr.Lock(s, a);
if (update && offset)
gpr.LoadToX64(a, true, true);
MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset);
@ -400,9 +423,8 @@ void Jit64::stX(UGeckoInstruction inst)
if (update && offset)
{
MEMCHECK_START
gpr.KillImmediate(a);
MOV(32, gpr.R(a), R(ABI_PARAM2));
ADD(32, gpr.R(a), Imm32(offset));
MEMCHECK_END
}

View File

@ -260,8 +260,6 @@ void Jit64::stfs(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
@ -301,7 +299,11 @@ void Jit64::stfs(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset)
{
MEMCHECK_START
MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END
}
CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2);
@ -316,8 +318,6 @@ void Jit64::stfsx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
gpr.FlushLockX(ABI_PARAM1);
fpr.Lock(inst.RS);

View File

@ -74,8 +74,9 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
{
if (Core::g_CoreStartupParameter.bUseFastMem && accessSize == 32 && !Core::g_CoreStartupParameter.bMMU)
if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU)
{
// FIXME: accessSize == 16 does not work. Breaks mkdd
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend);
}
else
@ -87,8 +88,11 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset,
FixupBranch addr20;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
if (Core::g_CoreStartupParameter.bMMU)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
}
TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ);
}
@ -98,8 +102,11 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset,
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
if (Core::g_CoreStartupParameter.bMMU)
{
SetJumpTarget(addrf0);
}
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
}
switch (accessSize)
@ -144,8 +151,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
// TODO: Figure out a cleaner way to check memory bounds
FixupBranch addrf0;
FixupBranch addr20;
FixupBranch fast;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
if (Core::g_CoreStartupParameter.bMMU)
{
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
@ -153,16 +159,13 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
addr20 = J_CC(CC_NZ);
}
if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem)
{
TEST(32, R(reg_addr), Imm32(0x0C000000));
fast = J_CC(CC_Z);
}
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch fast = J_CC(CC_Z);
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
if (Core::g_CoreStartupParameter.bMMU)
{
SetJumpTarget(addr20);
SetJumpTarget(addrf0);
SetJumpTarget(addr20);
}
switch (accessSize)