MMU Speed Optimisations:

* Un-cleaned the Load/Store code.  I think this will fix the stability issues from r6032
* Added memory exception checking to a couple more JIT FPU instructions
* Optimised the 64bit fast memory access slightly
* Optimised the MMU speed hack.

The rest of the speed optimisations from r6032 have been retained.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6034 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-02 10:28:37 +00:00
parent 7b83640a28
commit fcf2fb2b9b
3 changed files with 53 additions and 28 deletions

View File

@ -44,9 +44,11 @@ void Jit64::lbzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -54,11 +56,13 @@ void Jit64::lbzx(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -69,9 +73,11 @@ void Jit64::lhax(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -80,11 +86,13 @@ void Jit64::lhax(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -95,9 +103,11 @@ void Jit64::lwzx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(b);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
{ {
gpr.Lock(a);
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
} }
@ -105,11 +115,13 @@ void Jit64::lwzx(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, (b == d || a == d), true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -207,21 +219,25 @@ void Jit64::lXz(UGeckoInstruction inst)
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
BSWAP(32, gpr.R(d).GetSimpleReg()); BSWAP(32, gpr.R(d).GetSimpleReg());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.Flush(FLUSH_ALL);
} }
else else
{ {
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
gpr.LoadToX64(a, true, false);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, a == d, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
} }
@ -236,16 +252,19 @@ void Jit64::lha(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring // Safe and boring
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, d == a, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -268,12 +287,14 @@ void Jit64::lwzux(UGeckoInstruction inst)
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(d); gpr.Lock(d);
gpr.LoadToX64(d, b == d, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END MEMCHECK_END
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
// Zero cache line. // Zero cache line.
@ -392,7 +413,9 @@ void Jit64::stX(UGeckoInstruction inst)
//Still here? Do regular path. //Still here? Do regular path.
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a); gpr.Lock(s, a);
if (update && offset)
gpr.LoadToX64(a, true, true);
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s)); MOV(32, R(ABI_PARAM1), gpr.R(s));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset); SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, accessSize, offset);
@ -400,9 +423,8 @@ void Jit64::stX(UGeckoInstruction inst)
if (update && offset) if (update && offset)
{ {
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a); ADD(32, gpr.R(a), Imm32(offset));
MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END MEMCHECK_END
} }

View File

@ -260,8 +260,6 @@ void Jit64::stfs(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
@ -301,7 +299,11 @@ void Jit64::stfs(UGeckoInstruction inst)
ADD(32, R(ABI_PARAM2), Imm32(offset)); ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset) if (update && offset)
{ {
MEMCHECK_START
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
MEMCHECK_END
} }
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
SafeWriteFloatToReg(XMM0, ABI_PARAM2); SafeWriteFloatToReg(XMM0, ABI_PARAM2);
@ -316,8 +318,6 @@ void Jit64::stfsx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(LoadStoreFloating) JITDISABLE(LoadStoreFloating)
if (js.memcheck) { Default(inst); return; }
// We can take a shortcut here - it's not likely that a hardware access would use this instruction. // We can take a shortcut here - it's not likely that a hardware access would use this instruction.
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
fpr.Lock(inst.RS); fpr.Lock(inst.RS);

View File

@ -74,8 +74,9 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset, bool signExtend)
{ {
if (Core::g_CoreStartupParameter.bUseFastMem && accessSize == 32 && !Core::g_CoreStartupParameter.bMMU) if (Core::g_CoreStartupParameter.bUseFastMem && (accessSize == 32 || accessSize == 8) && !Core::g_CoreStartupParameter.bMMU)
{ {
// FIXME: accessSize == 16 does not work. Breaks mkdd
UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend); UnsafeLoadRegToReg(reg_addr, EAX, accessSize, offset, signExtend);
} }
else else
@ -87,8 +88,11 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset,
FixupBranch addr20; FixupBranch addr20;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{ {
CMP(32, R(reg_addr), Imm32(0xf0000000)); if (Core::g_CoreStartupParameter.bMMU)
addrf0 = J_CC(CC_GE); {
CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE);
}
TEST(32, R(reg_addr), Imm32(0x20000000)); TEST(32, R(reg_addr), Imm32(0x20000000));
addr20 = J_CC(CC_NZ); addr20 = J_CC(CC_NZ);
} }
@ -98,8 +102,11 @@ void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg_addr, int accessSize, s32 offset,
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{ {
if (Core::g_CoreStartupParameter.bMMU)
{
SetJumpTarget(addrf0);
}
SetJumpTarget(addr20); SetJumpTarget(addr20);
SetJumpTarget(addrf0);
} }
switch (accessSize) switch (accessSize)
@ -144,8 +151,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
// TODO: Figure out a cleaner way to check memory bounds // TODO: Figure out a cleaner way to check memory bounds
FixupBranch addrf0; FixupBranch addrf0;
FixupBranch addr20; FixupBranch addr20;
FixupBranch fast; if (Core::g_CoreStartupParameter.bMMU)
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{ {
CMP(32, R(reg_addr), Imm32(0xf0000000)); CMP(32, R(reg_addr), Imm32(0xf0000000));
addrf0 = J_CC(CC_GE); addrf0 = J_CC(CC_GE);
@ -153,16 +159,13 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
addr20 = J_CC(CC_NZ); addr20 = J_CC(CC_NZ);
} }
if (!Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bUseFastMem) TEST(32, R(reg_addr), Imm32(0x0C000000));
{ FixupBranch fast = J_CC(CC_Z);
TEST(32, R(reg_addr), Imm32(0x0C000000));
fast = J_CC(CC_Z);
}
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack) if (Core::g_CoreStartupParameter.bMMU)
{ {
SetJumpTarget(addr20);
SetJumpTarget(addrf0); SetJumpTarget(addrf0);
SetJumpTarget(addr20);
} }
switch (accessSize) switch (accessSize)