Optimised the JIT cache lookup in JIT and JITIL. Gives a <5% speed-up in GameCube games. Wii games and GC games+MMU get a smaller speed-up.

Cleaned up some code in the JIT and memory functions.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6129 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-26 11:06:47 +00:00
parent 9c98d0ab85
commit ea7004ffa7
8 changed files with 108 additions and 116 deletions

View File

@ -421,13 +421,7 @@ u32 Read_Instruction(const u32 em_address)
u32 Read_Opcode_JIT(u32 _Address) u32 Read_Opcode_JIT(u32 _Address)
{ {
#ifdef FAST_ICACHE #ifdef FAST_ICACHE
if (bMMU && !bFakeVMEM && if (bMMU && !bFakeVMEM && (_Address & ADDR_MASK_MEM1))
(_Address >> 28) != 0x0 &&
(_Address >> 28) != 0x8 &&
(_Address >> 28) != 0x9 &&
(_Address >> 28) != 0xC &&
(_Address >> 28) != 0xD
)
{ {
_Address = Memory::TranslateAddress(_Address, FLAG_OPCODE); _Address = Memory::TranslateAddress(_Address, FLAG_OPCODE);
if (_Address == 0) if (_Address == 0)
@ -439,22 +433,6 @@ u32 Read_Opcode_JIT(u32 _Address)
u32 inst = PowerPC::ppcState.iCache.ReadInstruction(_Address); u32 inst = PowerPC::ppcState.iCache.ReadInstruction(_Address);
#else #else
u32 inst = Memory::ReadUnchecked_U32(_Address); u32 inst = Memory::ReadUnchecked_U32(_Address);
#endif
// if a crash occured after that message
// that means that we have compiled outdated code from the cache instead of memory
// this could happen if a game forgot to icbi the new code
#if defined(_DEBUG) || defined(DEBUGFAST)
u32 inst_mem = Memory::ReadUnchecked_U32(_Address);
if (inst_mem != inst)
ERROR_LOG(POWERPC, "JIT: compiling outdated code. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
inst = Read_Opcode_JIT_LC(_Address);
if (inst_mem != inst)
{
ERROR_LOG(POWERPC, "JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
PanicAlert("JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
Write_Opcode_JIT(_Address, inst_mem);
}
#endif #endif
return inst; return inst;
} }

View File

@ -89,10 +89,6 @@ inline u64 bswap(u64 val) {return Common::swap64(val);}
// Read and write // Read and write
// ---------------- // ----------------
// The read and write macros that direct us to the right functions // The read and write macros that direct us to the right functions
// ----------------
/* Instructions: To test the TLB functions in F-Zero disable the "&& ((_Address & 0xFE000000)
== 0x7e000000)" condition next to bFakeVMEM below. */
// ----------------
// All these little inline functions are needed because we can't paste symbols together in templates // All these little inline functions are needed because we can't paste symbols together in templates
// like we can in macros. // like we can in macros.
@ -327,11 +323,8 @@ u32 Read_Opcode(u32 _Address)
} }
if (Core::g_CoreStartupParameter.bMMU && if (Core::g_CoreStartupParameter.bMMU &&
(_Address >> 28) != 0x0 && !Core::g_CoreStartupParameter.iTLBHack &&
(_Address >> 28) != 0x8 && (_Address & ADDR_MASK_MEM1))
(_Address >> 28) != 0x9 &&
(_Address >> 28) != 0xC &&
(_Address >> 28) != 0xD)
{ {
// TODO: Check for MSR instruction address translation flag before translating // TODO: Check for MSR instruction address translation flag before translating
u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE); u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE);

View File

@ -53,7 +53,7 @@ using namespace PowerPC;
// * Fast dispatcher // * Fast dispatcher
// Unfeatures: // Unfeatures:
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function. // * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
// Various notes below // Various notes below
@ -73,8 +73,6 @@ using namespace PowerPC;
// Open questions // Open questions
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp // * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns. // * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
// This can even be seen in one homebrew Wii demo - RayTracer.elf
// Other considerations // Other considerations
// //
@ -419,13 +417,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (em_address == 0) if (em_address == 0)
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
if (Core::g_CoreStartupParameter.bMMU && if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
(em_address >> 28) != 0x0 &&
(em_address >> 28) != 0x8 &&
(em_address >> 28) != 0x9 &&
(em_address >> 28) != 0xC &&
(em_address >> 28) != 0xD
)
{ {
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE)) if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
{ {

View File

@ -160,12 +160,19 @@ void Jit64AsmRoutineManager::Generate()
SetJumpTarget(needinst); SetJumpTarget(needinst);
#ifdef JIT_UNLIMITED_ICACHE #ifdef JIT_UNLIMITED_ICACHE
u32 mask = 0;
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_mem;
FixupBranch vmem = J_CC(CC_NZ); FixupBranch exit_mem;
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch exit_vmem;
FixupBranch exram = J_CC(CC_NZ); if (Core::g_CoreStartupParameter.bWii)
mask = JIT_ICACHE_EXRAM_BIT;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
mask |= JIT_ICACHE_VMEM_BIT;
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(mask));
no_mem = J_CC(CC_NZ);
}
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86 #ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache())); MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
@ -173,21 +180,15 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache())); MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif #endif
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
FixupBranch getinst = J(); {
SetJumpTarget(exram); exit_mem = J();
SetJumpTarget(no_mem);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); }
#ifdef _M_IX86 if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx())); {
#else TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx())); FixupBranch no_vmem = J_CC(CC_Z);
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst2 = J();
SetJumpTarget(vmem);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86 #ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM())); MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
@ -195,9 +196,26 @@ void Jit64AsmRoutineManager::Generate()
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM())); MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif #endif
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(getinst2); SetJumpTarget(no_vmem);
SetJumpTarget(getinst); }
if (Core::g_CoreStartupParameter.bWii)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
SetJumpTarget(no_exram);
}
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
SetJumpTarget(exit_mem);
if (Core::g_CoreStartupParameter.bWii)
SetJumpTarget(exit_vmem);
#else #else
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));

View File

@ -47,7 +47,7 @@ using namespace PowerPC;
// * Fast dispatcher // * Fast dispatcher
// Unfeatures: // Unfeatures:
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function. // * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
// Various notes below // Various notes below
@ -67,8 +67,6 @@ using namespace PowerPC;
// Open questions // Open questions
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp // * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns. // * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
// This can even be seen in one homebrew Wii demo - RayTracer.elf
// Other considerations // Other considerations
// //
@ -412,12 +410,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (em_address == 0) if (em_address == 0)
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR); PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
if (Core::g_CoreStartupParameter.bMMU && if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
(em_address >> 28) != 0x0 &&
(em_address >> 28) != 0x8 &&
(em_address >> 28) != 0x9 &&
(em_address >> 28) != 0xC &&
(em_address >> 28) != 0xD)
{ {
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE)) if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
{ {

View File

@ -167,12 +167,19 @@ void JitILAsmRoutineManager::Generate()
SetJumpTarget(needinst); SetJumpTarget(needinst);
#ifdef JIT_UNLIMITED_ICACHE #ifdef JIT_UNLIMITED_ICACHE
u32 mask = 0;
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_mem;
FixupBranch vmem = J_CC(CC_NZ); FixupBranch exit_mem;
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch exit_vmem;
FixupBranch exram = J_CC(CC_NZ); if (Core::g_CoreStartupParameter.bWii)
mask = JIT_ICACHE_EXRAM_BIT;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
mask |= JIT_ICACHE_VMEM_BIT;
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(mask));
no_mem = J_CC(CC_NZ);
}
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86 #ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache())); MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
@ -180,21 +187,15 @@ void JitILAsmRoutineManager::Generate()
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache())); MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif #endif
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
FixupBranch getinst = J(); {
SetJumpTarget(exram); exit_mem = J();
SetJumpTarget(no_mem);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); }
#ifdef _M_IX86 if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx())); {
#else TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx())); FixupBranch no_vmem = J_CC(CC_Z);
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst2 = J();
SetJumpTarget(vmem);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86 #ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM())); MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
@ -202,9 +203,26 @@ void JitILAsmRoutineManager::Generate()
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM())); MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif #endif
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(getinst); SetJumpTarget(no_vmem);
SetJumpTarget(getinst2); }
if (Core::g_CoreStartupParameter.bWii)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
SetJumpTarget(no_exram);
}
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
SetJumpTarget(exit_mem);
if (Core::g_CoreStartupParameter.bWii)
SetJumpTarget(exit_vmem);
#else #else
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));

View File

@ -147,7 +147,7 @@ void Init(int cpu_core)
else else
jit = new JitIL; jit = new JitIL;
jit->Init(); jit->Init();
// ... but start as interpreter by default.
mode = MODE_JIT; mode = MODE_JIT;
state = CPU_STEPPING; state = CPU_STEPPING;