Optimised the JIT cache lookup in JIT and JITIL. Gives a <5% speed-up in GameCube games. Wii games and GC games+MMU get a smaller speed-up.

Cleaned up some code in the JIT and memory functions.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6129 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-08-26 11:06:47 +00:00
parent 9c98d0ab85
commit ea7004ffa7
8 changed files with 108 additions and 116 deletions

View File

@ -421,13 +421,7 @@ u32 Read_Instruction(const u32 em_address)
u32 Read_Opcode_JIT(u32 _Address)
{
#ifdef FAST_ICACHE
if (bMMU && !bFakeVMEM &&
(_Address >> 28) != 0x0 &&
(_Address >> 28) != 0x8 &&
(_Address >> 28) != 0x9 &&
(_Address >> 28) != 0xC &&
(_Address >> 28) != 0xD
)
if (bMMU && !bFakeVMEM && (_Address & ADDR_MASK_MEM1))
{
_Address = Memory::TranslateAddress(_Address, FLAG_OPCODE);
if (_Address == 0)
@ -439,22 +433,6 @@ u32 Read_Opcode_JIT(u32 _Address)
u32 inst = PowerPC::ppcState.iCache.ReadInstruction(_Address);
#else
u32 inst = Memory::ReadUnchecked_U32(_Address);
#endif
// if a crash occured after that message
// that means that we have compiled outdated code from the cache instead of memory
// this could happen if a game forgot to icbi the new code
#if defined(_DEBUG) || defined(DEBUGFAST)
u32 inst_mem = Memory::ReadUnchecked_U32(_Address);
if (inst_mem != inst)
ERROR_LOG(POWERPC, "JIT: compiling outdated code. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
inst = Read_Opcode_JIT_LC(_Address);
if (inst_mem != inst)
{
ERROR_LOG(POWERPC, "JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
PanicAlert("JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
Write_Opcode_JIT(_Address, inst_mem);
}
#endif
return inst;
}

View File

@ -76,9 +76,9 @@ enum
ADDR_MASK_HW_ACCESS = 0x0c000000,
ADDR_MASK_MEM1 = 0x20000000,
#ifdef _M_IX86
MEMVIEW32_MASK = 0x3FFFFFFF,
#endif
#ifdef _M_IX86
MEMVIEW32_MASK = 0x3FFFFFFF,
#endif
};
// Init and Shutdown

View File

@ -89,10 +89,6 @@ inline u64 bswap(u64 val) {return Common::swap64(val);}
// Read and write
// ----------------
// The read and write macros that direct us to the right functions
// ----------------
/* Instructions: To test the TLB functions in F-Zero disable the "&& ((_Address & 0xFE000000)
== 0x7e000000)" condition next to bFakeVMEM below. */
// ----------------
// All these little inline functions are needed because we can't paste symbols together in templates
// like we can in macros.
@ -327,11 +323,8 @@ u32 Read_Opcode(u32 _Address)
}
if (Core::g_CoreStartupParameter.bMMU &&
(_Address >> 28) != 0x0 &&
(_Address >> 28) != 0x8 &&
(_Address >> 28) != 0x9 &&
(_Address >> 28) != 0xC &&
(_Address >> 28) != 0xD)
!Core::g_CoreStartupParameter.iTLBHack &&
(_Address & ADDR_MASK_MEM1))
{
// TODO: Check for MSR instruction address translation flag before translating
u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE);

View File

@ -53,7 +53,7 @@ using namespace PowerPC;
// * Fast dispatcher
// Unfeatures:
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function.
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
// Various notes below
@ -73,8 +73,6 @@ using namespace PowerPC;
// Open questions
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
// This can even be seen in one homebrew Wii demo - RayTracer.elf
// Other considerations
//
@ -419,13 +417,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (em_address == 0)
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
if (Core::g_CoreStartupParameter.bMMU &&
(em_address >> 28) != 0x0 &&
(em_address >> 28) != 0x8 &&
(em_address >> 28) != 0x9 &&
(em_address >> 28) != 0xC &&
(em_address >> 28) != 0xD
)
if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
{
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
{

View File

@ -160,51 +160,69 @@ void Jit64AsmRoutineManager::Generate()
SetJumpTarget(needinst);
#ifdef JIT_UNLIMITED_ICACHE
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch vmem = J_CC(CC_NZ);
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch exram = J_CC(CC_NZ);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
u32 mask = 0;
FixupBranch no_mem;
FixupBranch exit_mem;
FixupBranch exit_vmem;
if (Core::g_CoreStartupParameter.bWii)
mask = JIT_ICACHE_EXRAM_BIT;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
mask |= JIT_ICACHE_VMEM_BIT;
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(mask));
no_mem = J_CC(CC_NZ);
}
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst = J();
SetJumpTarget(exram);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
exit_mem = J();
SetJumpTarget(no_mem);
}
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst2 = J();
SetJumpTarget(vmem);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(no_vmem);
}
if (Core::g_CoreStartupParameter.bWii)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
SetJumpTarget(getinst2);
SetJumpTarget(getinst);
SetJumpTarget(no_exram);
}
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
SetJumpTarget(exit_mem);
if (Core::g_CoreStartupParameter.bWii)
SetJumpTarget(exit_vmem);
#else
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EBX), Imm32((u32)Memory::base));
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EBX), Imm32((u32)Memory::base));
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
#else
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
#endif
#endif
JMP(haveinst, true);

View File

@ -47,7 +47,7 @@ using namespace PowerPC;
// * Fast dispatcher
// Unfeatures:
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function.
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
// Various notes below
@ -67,8 +67,6 @@ using namespace PowerPC;
// Open questions
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
// This can even be seen in one homebrew Wii demo - RayTracer.elf
// Other considerations
//
@ -412,12 +410,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (em_address == 0)
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
if (Core::g_CoreStartupParameter.bMMU &&
(em_address >> 28) != 0x0 &&
(em_address >> 28) != 0x8 &&
(em_address >> 28) != 0x9 &&
(em_address >> 28) != 0xC &&
(em_address >> 28) != 0xD)
if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
{
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
{

View File

@ -167,12 +167,19 @@ void JitILAsmRoutineManager::Generate()
SetJumpTarget(needinst);
#ifdef JIT_UNLIMITED_ICACHE
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch vmem = J_CC(CC_NZ);
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch exram = J_CC(CC_NZ);
u32 mask = 0;
FixupBranch no_mem;
FixupBranch exit_mem;
FixupBranch exit_vmem;
if (Core::g_CoreStartupParameter.bWii)
mask = JIT_ICACHE_EXRAM_BIT;
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
mask |= JIT_ICACHE_VMEM_BIT;
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(mask));
no_mem = J_CC(CC_NZ);
}
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
@ -180,31 +187,42 @@ void JitILAsmRoutineManager::Generate()
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst = J();
SetJumpTarget(exram);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
exit_mem = J();
SetJumpTarget(no_mem);
}
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
FixupBranch getinst2 = J();
SetJumpTarget(vmem);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(no_vmem);
}
if (Core::g_CoreStartupParameter.bWii)
{
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
#ifdef _M_IX86
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
#else
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
#endif
SetJumpTarget(getinst);
SetJumpTarget(getinst2);
SetJumpTarget(no_exram);
}
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
SetJumpTarget(exit_mem);
if (Core::g_CoreStartupParameter.bWii)
SetJumpTarget(exit_vmem);
#else
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));

View File

@ -147,7 +147,7 @@ void Init(int cpu_core)
else
jit = new JitIL;
jit->Init();
// ... but start as interpreter by default.
mode = MODE_JIT;
state = CPU_STEPPING;