Optimised the JIT cache lookup in JIT and JITIL. Gives a <5% speed-up in GameCube games. Wii games and GC games+MMU get a smaller speed-up.
Cleaned up some code in the JIT and memory functions. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6129 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
9c98d0ab85
commit
ea7004ffa7
|
@ -421,13 +421,7 @@ u32 Read_Instruction(const u32 em_address)
|
|||
u32 Read_Opcode_JIT(u32 _Address)
|
||||
{
|
||||
#ifdef FAST_ICACHE
|
||||
if (bMMU && !bFakeVMEM &&
|
||||
(_Address >> 28) != 0x0 &&
|
||||
(_Address >> 28) != 0x8 &&
|
||||
(_Address >> 28) != 0x9 &&
|
||||
(_Address >> 28) != 0xC &&
|
||||
(_Address >> 28) != 0xD
|
||||
)
|
||||
if (bMMU && !bFakeVMEM && (_Address & ADDR_MASK_MEM1))
|
||||
{
|
||||
_Address = Memory::TranslateAddress(_Address, FLAG_OPCODE);
|
||||
if (_Address == 0)
|
||||
|
@ -439,22 +433,6 @@ u32 Read_Opcode_JIT(u32 _Address)
|
|||
u32 inst = PowerPC::ppcState.iCache.ReadInstruction(_Address);
|
||||
#else
|
||||
u32 inst = Memory::ReadUnchecked_U32(_Address);
|
||||
#endif
|
||||
// if a crash occured after that message
|
||||
// that means that we have compiled outdated code from the cache instead of memory
|
||||
// this could happen if a game forgot to icbi the new code
|
||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||
u32 inst_mem = Memory::ReadUnchecked_U32(_Address);
|
||||
if (inst_mem != inst)
|
||||
ERROR_LOG(POWERPC, "JIT: compiling outdated code. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
|
||||
|
||||
inst = Read_Opcode_JIT_LC(_Address);
|
||||
if (inst_mem != inst)
|
||||
{
|
||||
ERROR_LOG(POWERPC, "JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
|
||||
PanicAlert("JIT: self-modifying code detected. addr=%x, mem=%x, cache=%x", _Address, inst_mem, inst);
|
||||
Write_Opcode_JIT(_Address, inst_mem);
|
||||
}
|
||||
#endif
|
||||
return inst;
|
||||
}
|
||||
|
|
|
@ -76,9 +76,9 @@ enum
|
|||
ADDR_MASK_HW_ACCESS = 0x0c000000,
|
||||
ADDR_MASK_MEM1 = 0x20000000,
|
||||
|
||||
#ifdef _M_IX86
|
||||
MEMVIEW32_MASK = 0x3FFFFFFF,
|
||||
#endif
|
||||
#ifdef _M_IX86
|
||||
MEMVIEW32_MASK = 0x3FFFFFFF,
|
||||
#endif
|
||||
};
|
||||
|
||||
// Init and Shutdown
|
||||
|
|
|
@ -89,10 +89,6 @@ inline u64 bswap(u64 val) {return Common::swap64(val);}
|
|||
// Read and write
|
||||
// ----------------
|
||||
// The read and write macros that direct us to the right functions
|
||||
// ----------------
|
||||
/* Instructions: To test the TLB functions in F-Zero disable the "&& ((_Address & 0xFE000000)
|
||||
== 0x7e000000)" condition next to bFakeVMEM below. */
|
||||
// ----------------
|
||||
|
||||
// All these little inline functions are needed because we can't paste symbols together in templates
|
||||
// like we can in macros.
|
||||
|
@ -326,12 +322,9 @@ u32 Read_Opcode(u32 _Address)
|
|||
return 0x00000000;
|
||||
}
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU &&
|
||||
(_Address >> 28) != 0x0 &&
|
||||
(_Address >> 28) != 0x8 &&
|
||||
(_Address >> 28) != 0x9 &&
|
||||
(_Address >> 28) != 0xC &&
|
||||
(_Address >> 28) != 0xD)
|
||||
if (Core::g_CoreStartupParameter.bMMU &&
|
||||
!Core::g_CoreStartupParameter.iTLBHack &&
|
||||
(_Address & ADDR_MASK_MEM1))
|
||||
{
|
||||
// TODO: Check for MSR instruction address translation flag before translating
|
||||
u32 tlb_addr = Memory::TranslateAddress(_Address, FLAG_OPCODE);
|
||||
|
|
|
@ -53,7 +53,7 @@ using namespace PowerPC;
|
|||
// * Fast dispatcher
|
||||
|
||||
// Unfeatures:
|
||||
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function.
|
||||
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
|
||||
|
||||
// Various notes below
|
||||
|
||||
|
@ -73,8 +73,6 @@ using namespace PowerPC;
|
|||
// Open questions
|
||||
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
|
||||
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
|
||||
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
|
||||
// This can even be seen in one homebrew Wii demo - RayTracer.elf
|
||||
|
||||
// Other considerations
|
||||
//
|
||||
|
@ -419,13 +417,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
if (em_address == 0)
|
||||
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU &&
|
||||
(em_address >> 28) != 0x0 &&
|
||||
(em_address >> 28) != 0x8 &&
|
||||
(em_address >> 28) != 0x9 &&
|
||||
(em_address >> 28) != 0xC &&
|
||||
(em_address >> 28) != 0xD
|
||||
)
|
||||
if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
|
||||
{
|
||||
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
|
||||
{
|
||||
|
|
|
@ -160,53 +160,71 @@ void Jit64AsmRoutineManager::Generate()
|
|||
|
||||
SetJumpTarget(needinst);
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
FixupBranch vmem = J_CC(CC_NZ);
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
FixupBranch exram = J_CC(CC_NZ);
|
||||
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
u32 mask = 0;
|
||||
FixupBranch no_mem;
|
||||
FixupBranch exit_mem;
|
||||
FixupBranch exit_vmem;
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
mask = JIT_ICACHE_EXRAM_BIT;
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
mask |= JIT_ICACHE_VMEM_BIT;
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(mask));
|
||||
no_mem = J_CC(CC_NZ);
|
||||
}
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
|
||||
#else
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
FixupBranch getinst = J();
|
||||
SetJumpTarget(exram);
|
||||
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
exit_mem = J();
|
||||
SetJumpTarget(no_mem);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
FixupBranch no_vmem = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
#else
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
FixupBranch getinst2 = J();
|
||||
SetJumpTarget(vmem);
|
||||
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
|
||||
SetJumpTarget(no_vmem);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
FixupBranch no_exram = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
|
||||
#else
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
SetJumpTarget(getinst2);
|
||||
SetJumpTarget(getinst);
|
||||
SetJumpTarget(no_exram);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
SetJumpTarget(exit_mem);
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
SetJumpTarget(exit_vmem);
|
||||
#else
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EBX), Imm32((u32)Memory::base));
|
||||
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EBX), Imm32((u32)Memory::base));
|
||||
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
|
||||
#else
|
||||
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
|
||||
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
JMP(haveinst, true);
|
||||
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ using namespace PowerPC;
|
|||
// * Fast dispatcher
|
||||
|
||||
// Unfeatures:
|
||||
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function.
|
||||
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding Interpreter function.
|
||||
|
||||
// Various notes below
|
||||
|
||||
|
@ -67,8 +67,6 @@ using namespace PowerPC;
|
|||
// Open questions
|
||||
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
|
||||
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
|
||||
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
|
||||
// This can even be seen in one homebrew Wii demo - RayTracer.elf
|
||||
|
||||
// Other considerations
|
||||
//
|
||||
|
@ -412,12 +410,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
if (em_address == 0)
|
||||
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU &&
|
||||
(em_address >> 28) != 0x0 &&
|
||||
(em_address >> 28) != 0x8 &&
|
||||
(em_address >> 28) != 0x9 &&
|
||||
(em_address >> 28) != 0xC &&
|
||||
(em_address >> 28) != 0xD)
|
||||
if (Core::g_CoreStartupParameter.bMMU && (em_address & JIT_ICACHE_VMEM_BIT))
|
||||
{
|
||||
if (!Memory::TranslateAddress(em_address, Memory::FLAG_OPCODE))
|
||||
{
|
||||
|
|
|
@ -167,12 +167,19 @@ void JitILAsmRoutineManager::Generate()
|
|||
|
||||
SetJumpTarget(needinst);
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
FixupBranch vmem = J_CC(CC_NZ);
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
FixupBranch exram = J_CC(CC_NZ);
|
||||
|
||||
u32 mask = 0;
|
||||
FixupBranch no_mem;
|
||||
FixupBranch exit_mem;
|
||||
FixupBranch exit_vmem;
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
mask = JIT_ICACHE_EXRAM_BIT;
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
mask |= JIT_ICACHE_VMEM_BIT;
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(mask));
|
||||
no_mem = J_CC(CC_NZ);
|
||||
}
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICache()));
|
||||
|
@ -180,31 +187,42 @@ void JitILAsmRoutineManager::Generate()
|
|||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICache()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
FixupBranch getinst = J();
|
||||
SetJumpTarget(exram);
|
||||
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
exit_mem = J();
|
||||
SetJumpTarget(no_mem);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
FixupBranch no_vmem = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
#else
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
FixupBranch getinst2 = J();
|
||||
SetJumpTarget(vmem);
|
||||
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
|
||||
SetJumpTarget(no_vmem);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
FixupBranch no_exram = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)jit->GetBlockCache()->GetICacheEx()));
|
||||
#else
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheVMEM()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->GetICacheEx()));
|
||||
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
|
||||
SetJumpTarget(getinst);
|
||||
SetJumpTarget(getinst2);
|
||||
SetJumpTarget(no_exram);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.iTLBHack)
|
||||
SetJumpTarget(exit_mem);
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
SetJumpTarget(exit_vmem);
|
||||
#else
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
|
@ -213,7 +231,7 @@ void JitILAsmRoutineManager::Generate()
|
|||
#else
|
||||
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
JMP(haveinst, true);
|
||||
|
||||
GenerateCommon();
|
||||
|
|
|
@ -147,7 +147,7 @@ void Init(int cpu_core)
|
|||
else
|
||||
jit = new JitIL;
|
||||
jit->Init();
|
||||
// ... but start as interpreter by default.
|
||||
|
||||
mode = MODE_JIT;
|
||||
state = CPU_STEPPING;
|
||||
|
||||
|
|
Loading…
Reference in New Issue